from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.service import Service
from bs4 import BeautifulSoup
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
from sklearn.metrics import calinski_harabasz_score, silhouette_score, davies_bouldin_score
%matplotlib inline
pd.set_option('display.max_columns', None)
I used Selenium and beautifulsoup to scrape NBA.com. It's important to note that I used the data from the traditional, advanced and scoring pages. Scoring and traditional use this chunk while advanced uses the next chunk. For some reason, the HTML on the advanced page includes things that don't work with this chunk and I had to modify it
def scrape_nba_com(drive, table_name):
soup = BeautifulSoup(drive.page_source, 'html.parser')
table = soup.find("table", {"class" : table_name}) # Find the table that has table_name
thead = table.find("thead") # Get the head
# th stands for table-header
table_headers = thead.find_all("th") # Get all table headers
# extract actual header name from th elements
cleaned_headers = [i.text for i in table_headers]
# more clean up
cleaned_headers = [i for i in cleaned_headers if "RANK" not in i]
# Get rows
table_rows = table.find("tbody").find_all("tr")
td_in_rows = [r.find_all("td") for r in table_rows]
# nested list comprehension to extract actual data from each row
# code is basically identical to above cell
table_data = [[td.text for td in i] for i in td_in_rows]
# Change everything into a dataframe
df = pd.DataFrame(data=table_data,
columns=cleaned_headers)
return df
def scrape_nba_com_ad(drive, table_name):
#driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
#driver.get(url)
soup = BeautifulSoup(drive.page_source, 'html.parser')
table = soup.find("table", {"class" : table_name})
thead = table.find("thead")
# th stands for table-header
table_headers = thead.find_all("th")
# extract actual header name from th elements
cleaned_headers = [i.text for i in table_headers]
# more clean up
cleaned_headers = [i for i in cleaned_headers if "RANK" not in i]
cleaned_headers = cleaned_headers[:24] # This is the part that is different. Only take columns up to 24
# Get rows
table_rows = table.find("tbody").find_all("tr")
td_in_rows = [r.find_all("td") for r in table_rows]
# nested list comprehension to extract actual data from each row
# code is basically identical to above cell
#
table_data = [[td.text for td in i] for i in td_in_rows]
df = pd.DataFrame(data=table_data,
columns=cleaned_headers)
return df
Some important things to note: I only took data from 2022-2023 and I only took traditional, advanced and scoring. In addition, when you run the chunk below, the nba.com page will show up. Click accept for the cookies, and change the drop down arrow that is next to page from 1 to All. This will make it so that all the players show up. Otherwise, you will only get the first 50. This must be done with each of the chunks below that take you to the website. Once you change it to all, minimize the page and run the df_2022 = cell and you should get the dataframe
This one is for traditional
url = "https://www.nba.com/stats/players/traditional?PerMode=PerGame&sort=PTS&dir=-1&SeasonType=Regular+Season&Season=2022-23"
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
driver.get(url)
df_2022 = scrape_nba_com(driver, "Crom_table__p1iZz") # Get the dataframe
df_2022 # Check that it looks correct
| Player | Team | Age | GP | W | L | Min | PTS | FGM | FGA | FG% | 3PM | 3PA | 3P% | FTM | FTA | FT% | OREB | DREB | REB | AST | TOV | STL | BLK | PF | FP | DD2 | TD3 | +/- | ||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | Joel Embiid | PHI | 29 | 66 | 43 | 23 | 34.6 | 33.1 | 11.0 | 20.1 | 54.8 | 1.0 | 3.0 | 33.0 | 10.0 | 11.7 | 85.7 | 1.7 | 8.4 | 10.2 | 4.2 | 3.4 | 1.0 | 1.7 | 3.1 | 56.2 | 39.0 | 1.0 | 6.4 |
| 1 | 2 | Luka Doncic | DAL | 24 | 66 | 33 | 33 | 36.2 | 32.4 | 10.9 | 22.0 | 49.6 | 2.8 | 8.2 | 34.2 | 7.8 | 10.5 | 74.2 | 0.8 | 7.8 | 8.6 | 8.0 | 3.6 | 1.4 | 0.5 | 2.5 | 56.8 | 36.0 | 10.0 | 1.9 |
| 2 | 3 | Damian Lillard | POR | 32 | 58 | 27 | 31 | 36.3 | 32.2 | 9.6 | 20.7 | 46.3 | 4.2 | 11.3 | 37.1 | 8.8 | 9.6 | 91.4 | 0.8 | 4.0 | 4.8 | 7.3 | 3.3 | 0.9 | 0.3 | 1.9 | 49.1 | 16.0 | 2.0 | 1.8 |
| 3 | 4 | Shai Gilgeous-Alexander | OKC | 24 | 68 | 33 | 35 | 35.5 | 31.4 | 10.4 | 20.3 | 51.0 | 0.9 | 2.5 | 34.5 | 9.8 | 10.9 | 90.5 | 0.9 | 4.0 | 4.8 | 5.5 | 2.8 | 1.6 | 1.0 | 2.8 | 50.4 | 3.0 | 0.0 | 2.2 |
| 4 | 5 | Giannis Antetokounmpo | MIL | 28 | 63 | 47 | 16 | 32.1 | 31.1 | 11.2 | 20.3 | 55.3 | 0.7 | 2.7 | 27.5 | 7.9 | 12.3 | 64.5 | 2.2 | 9.6 | 11.8 | 5.7 | 3.9 | 0.8 | 0.8 | 3.1 | 54.8 | 46.0 | 6.0 | 5.4 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 534 | 535 | Alondes Williams | BKN | 23 | 1 | 1 | 0 | 5.3 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 1.0 | 0.0 | 2.0 | 0.0 | 0.0 | 1.0 | -0.8 | 0.0 | 0.0 | -5.0 |
| 535 | 535 | Deonte Burton | SAC | 29 | 2 | 1 | 1 | 3.2 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.5 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2.5 |
| 536 | 535 | Frank Jackson | UTA | 25 | 1 | 0 | 1 | 5.1 | 0.0 | 0.0 | 3.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 1.0 | 2.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 3.9 | 0.0 | 0.0 | -2.0 |
| 537 | 535 | Michael Foster Jr. | PHI | 20 | 1 | 1 | 0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | -2.0 |
| 538 | 535 | Sterling Brown | LAL | 28 | 4 | 2 | 2 | 6.1 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.5 | 0.0 | 0.0 | 0.0 | 0.0 | 0.8 | 1.3 | 2.0 | 0.5 | 0.0 | 0.8 | 0.0 | 1.0 | 5.4 | 0.0 | 0.0 | -1.0 |
539 rows × 30 columns
This one is for advanced
url = "https://www.nba.com/stats/players/advanced?PerMode=PerGame&sort=PTS&dir=-1&SeasonType=Regular+Season&Season=2022-23"
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
driver.get(url)
ad_2022 = scrape_nba_com_ad(driver, "Crom_table__p1iZz")
ad_2022
| PLAYER | TEAM | AGE | GP | W | L | MIN | OFFRTG | DEFRTG | NETRTG | AST% | AST/TO | AST RATIO | OREB% | DREB% | REB% | TO RATIO | EFG% | TS% | USG% | PACE | PIE | POSS | ||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | A.J. Lawson | DAL | 22 | 15 | 5 | 10 | 7.2 | 96.3 | 116.4 | -20.1 | 3.2 | 0.67 | 3.8 | 4.6 | 15.2 | 9.1 | 5.8 | 61.4 | 58.9 | 18.9 | 105.96 | 7.6 | 240 | |
| 1 | AJ Green | MIL | 23 | 35 | 27 | 8 | 9.9 | 107.9 | 112.8 | -4.9 | 9.2 | 2.44 | 14.0 | 1.6 | 10.5 | 6.1 | 5.7 | 60.0 | 60.7 | 15.9 | 102.90 | 7.6 | 744 | |
| 2 | AJ Griffin | ATL | 19 | 72 | 34 | 38 | 19.5 | 113.6 | 112.0 | 1.5 | 7.0 | 1.74 | 10.9 | 2.6 | 8.0 | 5.3 | 6.3 | 56.0 | 57.7 | 17.4 | 102.67 | 7.7 | 3005 | |
| 3 | Aaron Gordon | DEN | 27 | 68 | 45 | 23 | 30.2 | 123.0 | 110.9 | 12.1 | 12.9 | 2.07 | 16.9 | 8.6 | 13.6 | 11.2 | 8.2 | 60.3 | 61.7 | 20.6 | 100.16 | 11.2 | 4288 | |
| 4 | Aaron Holiday | ATL | 26 | 63 | 32 | 31 | 13.4 | 110.8 | 110.0 | 0.9 | 13.5 | 2.47 | 24.9 | 2.8 | 5.9 | 4.3 | 10.1 | 50.0 | 52.8 | 12.9 | 103.27 | 5.4 | 1819 | |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 534 | Zach Collins | SAS | 25 | 63 | 20 | 43 | 22.9 | 108.7 | 116.3 | -7.5 | 18.0 | 1.40 | 19.6 | 7.6 | 19.0 | 13.3 | 14.0 | 56.8 | 59.9 | 20.9 | 104.06 | 11.0 | 3121 | |
| 535 | Zach LaVine | CHI | 28 | 77 | 38 | 39 | 35.9 | 112.0 | 111.7 | 0.3 | 18.7 | 1.69 | 15.7 | 1.6 | 10.8 | 6.4 | 9.3 | 55.8 | 60.7 | 27.8 | 99.70 | 12.6 | 5750 | |
| 536 | Zeke Nnaji | DEN | 22 | 53 | 34 | 19 | 13.7 | 101.5 | 107.4 | -5.9 | 4.0 | 0.58 | 6.6 | 8.7 | 9.9 | 9.3 | 11.4 | 60.5 | 62.0 | 14.9 | 98.40 | 7.2 | 1488 | |
| 537 | Ziaire Williams | MEM | 21 | 37 | 21 | 16 | 15.2 | 106.9 | 112.2 | -5.2 | 8.6 | 0.95 | 12.6 | 2.8 | 10.5 | 6.7 | 13.4 | 49.2 | 51.1 | 17.8 | 102.80 | 5.2 | 1195 | |
| 538 | Zion Williamson | NOP | 22 | 29 | 17 | 12 | 33.0 | 116.2 | 108.4 | 7.8 | 23.1 | 1.34 | 16.5 | 6.0 | 14.8 | 10.4 | 12.3 | 61.5 | 65.2 | 28.8 | 102.17 | 17.1 | 2031 |
539 rows × 24 columns
This one is for scoring
url = "https://www.nba.com/stats/players/scoring?PerMode=PerGame&sort=PTS&dir=-1&SeasonType=Regular+Season&Season=2022-23"
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
driver.get(url)
score_2022 = scrape_nba_com(driver, "Crom_table__p1iZz")
score_2022
| Player | TEAM | AGE | GP | W | L | MIN | %FGA2PT | %FGA3PT | %PTS2PT | %PTS2PT MR | %PTS3PT | %PTSFBPs | %PTSFT | %PTSOFFTO | %PTSPITP | 2FGM%AST | 2FGM%UAST | 3FGM%AST | 3FGM%UAST | FGM%AST | FGM%UAST | ||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | A.J. Lawson | DAL | 22 | 15 | 5 | 10 | 7.2 | 43.2 | 56.8 | 42.9 | 0.0 | 53.6 | 19.6 | 3.5 | 3.6 | 42.9 | 50.0 | 50.0 | 100 | 0.0 | 72.7 | 27.3 | |
| 1 | AJ Green | MIL | 23 | 35 | 27 | 8 | 9.9 | 16.0 | 84.0 | 11.7 | 5.2 | 85.7 | 7.8 | 2.6 | 7.1 | 6.5 | 77.8 | 22.2 | 90.9 | 9.1 | 88.7 | 11.3 | |
| 2 | AJ Griffin | ATL | 19 | 72 | 34 | 38 | 19.5 | 51.4 | 48.6 | 46.0 | 9.7 | 47.4 | 10.5 | 6.6 | 11.0 | 36.3 | 55.8 | 44.2 | 89.1 | 10.9 | 69.4 | 30.6 | |
| 3 | Aaron Gordon | DEN | 27 | 68 | 45 | 23 | 30.2 | 77.3 | 22.7 | 66.5 | 2.5 | 16.2 | 14.3 | 17.3 | 12.6 | 64.0 | 63.1 | 36.9 | 73.3 | 26.7 | 64.6 | 35.4 | |
| 4 | Aaron Holiday | ATL | 26 | 63 | 32 | 31 | 13.4 | 60.0 | 40.0 | 45.3 | 7.3 | 43.7 | 6.9 | 11.0 | 13.8 | 38.1 | 21.4 | 78.6 | 86.1 | 13.9 | 46.7 | 53.3 | |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 534 | Zach Collins | SAS | 25 | 63 | 20 | 43 | 22.9 | 73.2 | 26.8 | 62.7 | 5.7 | 22.6 | 4.5 | 14.7 | 9.8 | 56.9 | 58.1 | 41.9 | 100 | 0.0 | 66.2 | 33.8 | |
| 535 | Zach LaVine | CHI | 28 | 77 | 38 | 39 | 35.9 | 60.8 | 39.2 | 49.0 | 12.1 | 32.0 | 15.7 | 19.0 | 17.0 | 36.9 | 33.5 | 66.5 | 64.7 | 35.3 | 42.9 | 57.1 | |
| 536 | Zeke Nnaji | DEN | 22 | 53 | 34 | 19 | 13.7 | 66.8 | 33.2 | 67.1 | 0.7 | 18.4 | 11.9 | 14.5 | 15.9 | 66.4 | 72.0 | 28.0 | 100 | 0.0 | 76.4 | 23.6 | |
| 537 | Ziaire Williams | MEM | 21 | 37 | 21 | 16 | 15.2 | 50.5 | 49.5 | 56.2 | 6.7 | 35.7 | 22.9 | 8.1 | 18.6 | 49.5 | 78.0 | 22.0 | 88.0 | 12.0 | 81.0 | 19.0 | |
| 538 | Zion Williamson | NOP | 22 | 29 | 17 | 12 | 33.0 | 95.9 | 4.1 | 73.7 | 0.3 | 2.8 | 11.1 | 23.5 | 14.3 | 73.5 | 45.7 | 54.3 | 85.7 | 14.3 | 46.7 | 53.3 |
539 rows × 23 columns
# This will merge the dataframes together
data = pd.merge(pd.merge(df_2022, ad_2022, left_on="Player", right_on="PLAYER"), score_2022, on="Player")
data # make sure it worked
| _x | Player | Team | Age | GP_x | W_x | L_x | Min | PTS | FGM | FGA | FG% | 3PM | 3PA | 3P% | FTM | FTA | FT% | OREB | DREB | REB | AST | TOV | STL | BLK | PF | FP | DD2 | TD3 | +/- | _y | PLAYER | TEAM_x | AGE_x | GP_y | W_y | L_y | MIN_x | OFFRTG | DEFRTG | NETRTG | AST% | AST/TO | AST RATIO | OREB% | DREB% | REB% | TO RATIO | EFG% | TS% | USG% | PACE | PIE | POSS | TEAM_y | AGE_y | GP | W | L | MIN_y | %FGA2PT | %FGA3PT | %PTS2PT | %PTS2PT MR | %PTS3PT | %PTSFBPs | %PTSFT | %PTSOFFTO | %PTSPITP | 2FGM%AST | 2FGM%UAST | 3FGM%AST | 3FGM%UAST | FGM%AST | FGM%UAST | ||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | Joel Embiid | PHI | 29 | 66 | 43 | 23 | 34.6 | 33.1 | 11.0 | 20.1 | 54.8 | 1.0 | 3.0 | 33.0 | 10.0 | 11.7 | 85.7 | 1.7 | 8.4 | 10.2 | 4.2 | 3.4 | 1.0 | 1.7 | 3.1 | 56.2 | 39.0 | 1.0 | 6.4 | Joel Embiid | PHI | 29 | 66 | 43 | 23 | 34.6 | 119.0 | 110.2 | 8.8 | 23.3 | 1.21 | 12.7 | 5.7 | 24.3 | 15.7 | 10.5 | 57.3 | 65.5 | 37.0 | 97.34 | 21.3 | 4639 | PHI | 29 | 66 | 43 | 23 | 34.6 | 84.9 | 15.1 | 60.7 | 15.7 | 9.1 | 6.3 | 30.2 | 12.7 | 45.0 | 60.0 | 40.0 | 89.4 | 10.6 | 62.6 | 37.4 | ||
| 1 | 2 | Luka Doncic | DAL | 24 | 66 | 33 | 33 | 36.2 | 32.4 | 10.9 | 22.0 | 49.6 | 2.8 | 8.2 | 34.2 | 7.8 | 10.5 | 74.2 | 0.8 | 7.8 | 8.6 | 8.0 | 3.6 | 1.4 | 0.5 | 2.5 | 56.8 | 36.0 | 10.0 | 1.9 | Luka Doncic | DAL | 24 | 66 | 33 | 33 | 36.2 | 118.1 | 116.0 | 2.1 | 40.8 | 2.24 | 21.1 | 2.4 | 22.4 | 12.4 | 9.4 | 56.0 | 60.9 | 36.8 | 97.63 | 20.2 | 4874 | DAL | 24 | 66 | 33 | 33 | 36.2 | 62.7 | 37.3 | 50.0 | 8.5 | 26.0 | 5.2 | 24.0 | 14.0 | 41.4 | 13.1 | 86.9 | 21.6 | 78.4 | 15.3 | 84.7 | ||
| 2 | 3 | Damian Lillard | POR | 32 | 58 | 27 | 31 | 36.3 | 32.2 | 9.6 | 20.7 | 46.3 | 4.2 | 11.3 | 37.1 | 8.8 | 9.6 | 91.4 | 0.8 | 4.0 | 4.8 | 7.3 | 3.3 | 0.9 | 0.3 | 1.9 | 49.1 | 16.0 | 2.0 | 1.8 | Damian Lillard | POR | 32 | 58 | 27 | 31 | 36.3 | 119.5 | 117.4 | 2.1 | 33.8 | 2.23 | 20.8 | 2.2 | 11.2 | 6.8 | 9.3 | 56.4 | 64.5 | 33.1 | 99.78 | 17.3 | 4385 | POR | 32 | 58 | 27 | 31 | 36.3 | 45.3 | 54.7 | 33.4 | 6.0 | 39.2 | 8.6 | 27.4 | 11.9 | 27.4 | 15.7 | 84.3 | 51.6 | 48.4 | 31.5 | 68.5 | ||
| 3 | 4 | Shai Gilgeous-Alexander | OKC | 24 | 68 | 33 | 35 | 35.5 | 31.4 | 10.4 | 20.3 | 51.0 | 0.9 | 2.5 | 34.5 | 9.8 | 10.9 | 90.5 | 0.9 | 4.0 | 4.8 | 5.5 | 2.8 | 1.6 | 1.0 | 2.8 | 50.4 | 3.0 | 0.0 | 2.2 | Shai Gilgeous-Alexander | OKC | 24 | 68 | 33 | 35 | 35.5 | 116.1 | 113.3 | 2.7 | 24.9 | 1.93 | 16.4 | 2.3 | 10.9 | 6.5 | 8.5 | 53.1 | 62.6 | 31.8 | 103.47 | 17.5 | 5211 | OKC | 24 | 68 | 33 | 35 | 35.5 | 87.8 | 12.2 | 60.5 | 9.7 | 8.1 | 12.7 | 31.4 | 17.7 | 50.8 | 20.1 | 79.9 | 31.0 | 69.0 | 21.0 | 79.0 | ||
| 4 | 5 | Giannis Antetokounmpo | MIL | 28 | 63 | 47 | 16 | 32.1 | 31.1 | 11.2 | 20.3 | 55.3 | 0.7 | 2.7 | 27.5 | 7.9 | 12.3 | 64.5 | 2.2 | 9.6 | 11.8 | 5.7 | 3.9 | 0.8 | 0.8 | 3.1 | 54.8 | 46.0 | 6.0 | 5.4 | Giannis Antetokounmpo | MIL | 28 | 63 | 47 | 16 | 32.1 | 116.4 | 109.2 | 7.2 | 31.4 | 1.46 | 16.2 | 6.5 | 26.8 | 17.1 | 11.1 | 57.2 | 60.5 | 37.3 | 103.63 | 20.4 | 4380 | MIL | 28 | 63 | 47 | 16 | 32.1 | 86.6 | 13.4 | 67.4 | 7.0 | 7.2 | 17.5 | 25.4 | 12.9 | 60.3 | 41.5 | 58.5 | 61.7 | 38.3 | 42.9 | 57.1 | ||
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 534 | 535 | Alondes Williams | BKN | 23 | 1 | 1 | 0 | 5.3 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 1.0 | 0.0 | 2.0 | 0.0 | 0.0 | 1.0 | -0.8 | 0.0 | 0.0 | -5.0 | Alondes Williams | BKN | 23 | 1 | 1 | 0 | 5.3 | 83.3 | 136.4 | -53.0 | 0.0 | 0.00 | 0.0 | 0.0 | 25.0 | 11.1 | 100.0 | 0.0 | 0.0 | 14.3 | 104.81 | -10.8 | 12 | BKN | 23 | 1 | 1 | 0 | 5.3 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 100 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | ||
| 535 | 535 | Deonte Burton | SAC | 29 | 2 | 1 | 1 | 3.2 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.5 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2.5 | Deonte Burton | SAC | 29 | 2 | 1 | 1 | 3.2 | 164.3 | 112.5 | 51.8 | 0.0 | 0.00 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 13.3 | 110.80 | -4.5 | 14 | SAC | 29 | 2 | 1 | 1 | 3.2 | 50.0 | 50.0 | 0.0 | 0.0 | 0.0 | 0.0 | 100 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | ||
| 536 | 535 | Frank Jackson | UTA | 25 | 1 | 0 | 1 | 5.1 | 0.0 | 0.0 | 3.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 1.0 | 2.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 3.9 | 0.0 | 0.0 | -2.0 | Frank Jackson | UTA | 25 | 1 | 0 | 1 | 5.1 | 100.0 | 122.2 | -22.2 | 25.0 | 0.00 | 25.0 | 12.5 | 33.3 | 18.2 | 0.0 | 0.0 | 0.0 | 25.0 | 85.54 | -2.6 | 9 | UTA | 25 | 1 | 0 | 1 | 5.1 | 66.7 | 33.3 | 0.0 | 0.0 | 0.0 | 0.0 | 100 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | ||
| 537 | 535 | Michael Foster Jr. | PHI | 20 | 1 | 1 | 0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | -2.0 | Michael Foster Jr. | PHI | 20 | 1 | 1 | 0 | 1.0 | 0.0 | 100.0 | -100.0 | 0.0 | 0.00 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 92.90 | 0.0 | 2 | PHI | 20 | 1 | 1 | 0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 100 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | ||
| 538 | 535 | Sterling Brown | LAL | 28 | 4 | 2 | 2 | 6.1 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.5 | 0.0 | 0.0 | 0.0 | 0.0 | 0.8 | 1.3 | 2.0 | 0.5 | 0.0 | 0.8 | 0.0 | 1.0 | 5.4 | 0.0 | 0.0 | -1.0 | Sterling Brown | LAL | 28 | 4 | 2 | 2 | 6.1 | 106.8 | 106.3 | 0.4 | 7.7 | 0.00 | 33.3 | 9.1 | 15.2 | 12.1 | 0.0 | 0.0 | 0.0 | 6.1 | 120.18 | 3.1 | 59 | LAL | 28 | 4 | 2 | 2 | 6.1 | 50.0 | 50.0 | 0.0 | 0.0 | 0.0 | 0.0 | 100 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
539 rows × 76 columns
# Drop all duplicate columns and those that are not needed. Could also change the merge above to a different join
data = data.drop(['\xa0_x', '\xa0_y', 'GP_x', 'W_x', 'L_x', 'TEAM_x', 'PLAYER', 'AGE_x', 'GP_y', 'W_y', 'L_y', 'MIN_x', 'TEAM_y', 'AGE_y', 'MIN_y', ' '], axis=1)
data.head() # Make sure everything looks nice
| Player | Team | Age | Min | PTS | FGM | FGA | FG% | 3PM | 3PA | 3P% | FTM | FTA | FT% | OREB | DREB | REB | AST | TOV | STL | BLK | PF | FP | DD2 | TD3 | +/- | OFFRTG | DEFRTG | NETRTG | AST% | AST/TO | AST RATIO | OREB% | DREB% | REB% | TO RATIO | EFG% | TS% | USG% | PACE | PIE | POSS | GP | W | L | %FGA2PT | %FGA3PT | %PTS2PT | %PTS2PT MR | %PTS3PT | %PTSFBPs | %PTSFT | %PTSOFFTO | %PTSPITP | 2FGM%AST | 2FGM%UAST | 3FGM%AST | 3FGM%UAST | FGM%AST | FGM%UAST | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Joel Embiid | PHI | 29 | 34.6 | 33.1 | 11.0 | 20.1 | 54.8 | 1.0 | 3.0 | 33.0 | 10.0 | 11.7 | 85.7 | 1.7 | 8.4 | 10.2 | 4.2 | 3.4 | 1.0 | 1.7 | 3.1 | 56.2 | 39.0 | 1.0 | 6.4 | 119.0 | 110.2 | 8.8 | 23.3 | 1.21 | 12.7 | 5.7 | 24.3 | 15.7 | 10.5 | 57.3 | 65.5 | 37.0 | 97.34 | 21.3 | 4639 | 66 | 43 | 23 | 84.9 | 15.1 | 60.7 | 15.7 | 9.1 | 6.3 | 30.2 | 12.7 | 45.0 | 60.0 | 40.0 | 89.4 | 10.6 | 62.6 | 37.4 |
| 1 | Luka Doncic | DAL | 24 | 36.2 | 32.4 | 10.9 | 22.0 | 49.6 | 2.8 | 8.2 | 34.2 | 7.8 | 10.5 | 74.2 | 0.8 | 7.8 | 8.6 | 8.0 | 3.6 | 1.4 | 0.5 | 2.5 | 56.8 | 36.0 | 10.0 | 1.9 | 118.1 | 116.0 | 2.1 | 40.8 | 2.24 | 21.1 | 2.4 | 22.4 | 12.4 | 9.4 | 56.0 | 60.9 | 36.8 | 97.63 | 20.2 | 4874 | 66 | 33 | 33 | 62.7 | 37.3 | 50.0 | 8.5 | 26.0 | 5.2 | 24.0 | 14.0 | 41.4 | 13.1 | 86.9 | 21.6 | 78.4 | 15.3 | 84.7 |
| 2 | Damian Lillard | POR | 32 | 36.3 | 32.2 | 9.6 | 20.7 | 46.3 | 4.2 | 11.3 | 37.1 | 8.8 | 9.6 | 91.4 | 0.8 | 4.0 | 4.8 | 7.3 | 3.3 | 0.9 | 0.3 | 1.9 | 49.1 | 16.0 | 2.0 | 1.8 | 119.5 | 117.4 | 2.1 | 33.8 | 2.23 | 20.8 | 2.2 | 11.2 | 6.8 | 9.3 | 56.4 | 64.5 | 33.1 | 99.78 | 17.3 | 4385 | 58 | 27 | 31 | 45.3 | 54.7 | 33.4 | 6.0 | 39.2 | 8.6 | 27.4 | 11.9 | 27.4 | 15.7 | 84.3 | 51.6 | 48.4 | 31.5 | 68.5 |
| 3 | Shai Gilgeous-Alexander | OKC | 24 | 35.5 | 31.4 | 10.4 | 20.3 | 51.0 | 0.9 | 2.5 | 34.5 | 9.8 | 10.9 | 90.5 | 0.9 | 4.0 | 4.8 | 5.5 | 2.8 | 1.6 | 1.0 | 2.8 | 50.4 | 3.0 | 0.0 | 2.2 | 116.1 | 113.3 | 2.7 | 24.9 | 1.93 | 16.4 | 2.3 | 10.9 | 6.5 | 8.5 | 53.1 | 62.6 | 31.8 | 103.47 | 17.5 | 5211 | 68 | 33 | 35 | 87.8 | 12.2 | 60.5 | 9.7 | 8.1 | 12.7 | 31.4 | 17.7 | 50.8 | 20.1 | 79.9 | 31.0 | 69.0 | 21.0 | 79.0 |
| 4 | Giannis Antetokounmpo | MIL | 28 | 32.1 | 31.1 | 11.2 | 20.3 | 55.3 | 0.7 | 2.7 | 27.5 | 7.9 | 12.3 | 64.5 | 2.2 | 9.6 | 11.8 | 5.7 | 3.9 | 0.8 | 0.8 | 3.1 | 54.8 | 46.0 | 6.0 | 5.4 | 116.4 | 109.2 | 7.2 | 31.4 | 1.46 | 16.2 | 6.5 | 26.8 | 17.1 | 11.1 | 57.2 | 60.5 | 37.3 | 103.63 | 20.4 | 4380 | 63 | 47 | 16 | 86.6 | 13.4 | 67.4 | 7.0 | 7.2 | 17.5 | 25.4 | 12.9 | 60.3 | 41.5 | 58.5 | 61.7 | 38.3 | 42.9 | 57.1 |
# Write our data to a csv so we don't have to run everything again
#data.to_csv('data.csv', index=False)
df = pd.read_csv('data.csv') # I wrote the data to a csv and now I read it in
df.head() # Check that it looks correct
| Player | Team | Age | Min | PTS | FGM | FGA | FG% | 3PM | 3PA | 3P% | FTM | FTA | FT% | OREB | DREB | REB | AST | TOV | STL | BLK | PF | FP | DD2 | TD3 | +/- | OFFRTG | DEFRTG | NETRTG | AST% | AST/TO | AST RATIO | OREB% | DREB% | REB% | TO RATIO | EFG% | TS% | USG% | PACE | PIE | POSS | GP | W | L | %FGA2PT | %FGA3PT | %PTS2PT | %PTS2PT MR | %PTS3PT | %PTSFBPs | %PTSFT | %PTSOFFTO | %PTSPITP | 2FGM%AST | 2FGM%UAST | 3FGM%AST | 3FGM%UAST | FGM%AST | FGM%UAST | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Joel Embiid | PHI | 29 | 34.6 | 33.1 | 11.0 | 20.1 | 54.8 | 1.0 | 3.0 | 33.0 | 10.0 | 11.7 | 85.7 | 1.7 | 8.4 | 10.2 | 4.2 | 3.4 | 1.0 | 1.7 | 3.1 | 56.2 | 39.0 | 1.0 | 6.4 | 119.0 | 110.2 | 8.8 | 23.3 | 1.21 | 12.7 | 5.7 | 24.3 | 15.7 | 10.5 | 57.3 | 65.5 | 37.0 | 97.34 | 21.3 | 4639 | 66 | 43 | 23 | 84.9 | 15.1 | 60.7 | 15.7 | 9.1 | 6.3 | 30.2 | 12.7 | 45.0 | 60.0 | 40.0 | 89.4 | 10.6 | 62.6 | 37.4 |
| 1 | Luka Doncic | DAL | 24 | 36.2 | 32.4 | 10.9 | 22.0 | 49.6 | 2.8 | 8.2 | 34.2 | 7.8 | 10.5 | 74.2 | 0.8 | 7.8 | 8.6 | 8.0 | 3.6 | 1.4 | 0.5 | 2.5 | 56.8 | 36.0 | 10.0 | 1.9 | 118.1 | 116.0 | 2.1 | 40.8 | 2.24 | 21.1 | 2.4 | 22.4 | 12.4 | 9.4 | 56.0 | 60.9 | 36.8 | 97.63 | 20.2 | 4874 | 66 | 33 | 33 | 62.7 | 37.3 | 50.0 | 8.5 | 26.0 | 5.2 | 24.0 | 14.0 | 41.4 | 13.1 | 86.9 | 21.6 | 78.4 | 15.3 | 84.7 |
| 2 | Damian Lillard | POR | 32 | 36.3 | 32.2 | 9.6 | 20.7 | 46.3 | 4.2 | 11.3 | 37.1 | 8.8 | 9.6 | 91.4 | 0.8 | 4.0 | 4.8 | 7.3 | 3.3 | 0.9 | 0.3 | 1.9 | 49.1 | 16.0 | 2.0 | 1.8 | 119.5 | 117.4 | 2.1 | 33.8 | 2.23 | 20.8 | 2.2 | 11.2 | 6.8 | 9.3 | 56.4 | 64.5 | 33.1 | 99.78 | 17.3 | 4385 | 58 | 27 | 31 | 45.3 | 54.7 | 33.4 | 6.0 | 39.2 | 8.6 | 27.4 | 11.9 | 27.4 | 15.7 | 84.3 | 51.6 | 48.4 | 31.5 | 68.5 |
| 3 | Shai Gilgeous-Alexander | OKC | 24 | 35.5 | 31.4 | 10.4 | 20.3 | 51.0 | 0.9 | 2.5 | 34.5 | 9.8 | 10.9 | 90.5 | 0.9 | 4.0 | 4.8 | 5.5 | 2.8 | 1.6 | 1.0 | 2.8 | 50.4 | 3.0 | 0.0 | 2.2 | 116.1 | 113.3 | 2.7 | 24.9 | 1.93 | 16.4 | 2.3 | 10.9 | 6.5 | 8.5 | 53.1 | 62.6 | 31.8 | 103.47 | 17.5 | 5211 | 68 | 33 | 35 | 87.8 | 12.2 | 60.5 | 9.7 | 8.1 | 12.7 | 31.4 | 17.7 | 50.8 | 20.1 | 79.9 | 31.0 | 69.0 | 21.0 | 79.0 |
| 4 | Giannis Antetokounmpo | MIL | 28 | 32.1 | 31.1 | 11.2 | 20.3 | 55.3 | 0.7 | 2.7 | 27.5 | 7.9 | 12.3 | 64.5 | 2.2 | 9.6 | 11.8 | 5.7 | 3.9 | 0.8 | 0.8 | 3.1 | 54.8 | 46.0 | 6.0 | 5.4 | 116.4 | 109.2 | 7.2 | 31.4 | 1.46 | 16.2 | 6.5 | 26.8 | 17.1 | 11.1 | 57.2 | 60.5 | 37.3 | 103.63 | 20.4 | 4380 | 63 | 47 | 16 | 86.6 | 13.4 | 67.4 | 7.0 | 7.2 | 17.5 | 25.4 | 12.9 | 60.3 | 41.5 | 58.5 | 61.7 | 38.3 | 42.9 | 57.1 |
df.info() # Just to make sure that we have no null values, which we don't
<class 'pandas.core.frame.DataFrame'> RangeIndex: 539 entries, 0 to 538 Data columns (total 60 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Player 539 non-null object 1 Team 539 non-null object 2 Age 539 non-null int64 3 Min 539 non-null float64 4 PTS 539 non-null float64 5 FGM 539 non-null float64 6 FGA 539 non-null float64 7 FG% 539 non-null float64 8 3PM 539 non-null float64 9 3PA 539 non-null float64 10 3P% 539 non-null float64 11 FTM 539 non-null float64 12 FTA 539 non-null float64 13 FT% 539 non-null float64 14 OREB 539 non-null float64 15 DREB 539 non-null float64 16 REB 539 non-null float64 17 AST 539 non-null float64 18 TOV 539 non-null float64 19 STL 539 non-null float64 20 BLK 539 non-null float64 21 PF 539 non-null float64 22 FP 539 non-null float64 23 DD2 539 non-null float64 24 TD3 539 non-null float64 25 +/- 539 non-null float64 26 OFFRTG 539 non-null float64 27 DEFRTG 539 non-null float64 28 NETRTG 539 non-null float64 29 AST% 539 non-null float64 30 AST/TO 539 non-null float64 31 AST RATIO 539 non-null float64 32 OREB% 539 non-null float64 33 DREB% 539 non-null float64 34 REB% 539 non-null float64 35 TO RATIO 539 non-null float64 36 EFG% 539 non-null float64 37 TS% 539 non-null float64 38 USG% 539 non-null float64 39 PACE 539 non-null float64 40 PIE 539 non-null float64 41 POSS 539 non-null int64 42 GP 539 non-null int64 43 W 539 non-null int64 44 L 539 non-null int64 45 %FGA2PT 539 non-null float64 46 %FGA3PT 539 non-null float64 47 %PTS2PT 539 non-null float64 48 %PTS2PT MR 539 non-null float64 49 %PTS3PT 539 non-null float64 50 %PTSFBPs 539 non-null float64 51 %PTSFT 539 non-null float64 52 %PTSOFFTO 539 non-null float64 53 %PTSPITP 539 non-null float64 54 2FGM%AST 539 non-null float64 55 2FGM%UAST 539 non-null float64 56 3FGM%AST 539 non-null float64 57 3FGM%UAST 539 non-null float64 58 FGM%AST 539 non-null float64 59 FGM%UAST 539 non-null float64 dtypes: float64(53), int64(5), object(2) memory usage: 252.8+ KB
df.describe() # Look at some descriptive statistics
| Age | Min | PTS | FGM | FGA | FG% | 3PM | 3PA | 3P% | FTM | FTA | FT% | OREB | DREB | REB | AST | TOV | STL | BLK | PF | FP | DD2 | TD3 | +/- | OFFRTG | DEFRTG | NETRTG | AST% | AST/TO | AST RATIO | OREB% | DREB% | REB% | TO RATIO | EFG% | TS% | USG% | PACE | PIE | POSS | GP | W | L | %FGA2PT | %FGA3PT | %PTS2PT | %PTS2PT MR | %PTS3PT | %PTSFBPs | %PTSFT | %PTSOFFTO | %PTSPITP | 2FGM%AST | 2FGM%UAST | 3FGM%AST | 3FGM%UAST | FGM%AST | FGM%UAST | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count | 539.000000 | 539.000000 | 539.000000 | 539.000000 | 539.000000 | 539.000000 | 539.000000 | 539.000000 | 539.000000 | 539.000000 | 539.000000 | 539.000000 | 539.000000 | 539.000000 | 539.000000 | 539.000000 | 539.000000 | 539.000000 | 539.000000 | 539.000000 | 539.000000 | 539.000000 | 539.000000 | 539.000000 | 539.000000 | 539.000000 | 539.000000 | 539.000000 | 539.000000 | 539.000000 | 539.000000 | 539.000000 | 539.000000 | 539.000000 | 539.000000 | 539.000000 | 539.000000 | 539.000000 | 539.000000 | 539.000000 | 539.000000 | 539.000000 | 539.000000 | 539.000000 | 539.000000 | 539.000000 | 539.000000 | 539.000000 | 539.000000 | 539.000000 | 539.000000 | 539.000000 | 539.000000 | 539.000000 | 539.000000 | 539.000000 | 539.000000 | 539.000000 |
| mean | 26.053803 | 19.796846 | 9.121336 | 3.348980 | 7.104824 | 46.325232 | 0.990538 | 2.782560 | 31.530612 | 1.434694 | 1.856957 | 71.993506 | 0.866234 | 2.676809 | 3.543228 | 2.068831 | 1.103711 | 0.609647 | 0.382375 | 1.687941 | 18.330427 | 4.011132 | 0.220779 | -0.375139 | 109.877180 | 111.433395 | -1.555288 | 13.700742 | 1.819555 | 17.541929 | 4.495176 | 13.224304 | 8.822263 | 10.252319 | 53.187941 | 56.058442 | 17.893135 | 101.723840 | 8.835807 | 2319.506494 | 48.040816 | 24.018553 | 24.022263 | 59.598330 | 40.031169 | 50.936735 | 6.299443 | 34.061781 | 12.430241 | 15.001484 | 14.907792 | 44.634694 | 53.777365 | 43.255102 | 80.663080 | 10.432282 | 65.272542 | 33.615028 |
| std | 4.314633 | 9.539535 | 6.842216 | 2.436261 | 4.956886 | 10.967271 | 0.872442 | 2.244579 | 13.711120 | 1.545867 | 1.880916 | 21.280275 | 0.747543 | 1.773074 | 2.346968 | 1.932954 | 0.826909 | 0.398533 | 0.384414 | 0.792286 | 11.652091 | 8.770932 | 1.564432 | 3.166926 | 9.728805 | 7.555920 | 12.470820 | 8.667679 | 1.124435 | 8.363304 | 3.562915 | 5.740101 | 4.051268 | 6.032949 | 10.618005 | 10.303772 | 5.672946 | 3.538229 | 4.097701 | 1726.672874 | 24.650686 | 14.496366 | 13.445866 | 22.145153 | 21.980182 | 20.839758 | 7.429529 | 22.246019 | 10.036057 | 11.474000 | 6.961117 | 21.439216 | 22.183506 | 21.466402 | 29.034435 | 14.710900 | 19.759378 | 18.844581 |
| min | 19.000000 | 1.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | -0.800000 | 0.000000 | 0.000000 | -29.000000 | 0.000000 | 60.000000 | -100.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 85.540000 | -10.800000 | 2.000000 | 1.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
| 25% | 23.000000 | 12.450000 | 4.150000 | 1.600000 | 3.400000 | 41.650000 | 0.300000 | 1.000000 | 28.100000 | 0.500000 | 0.600000 | 66.700000 | 0.300000 | 1.400000 | 1.800000 | 0.800000 | 0.500000 | 0.300000 | 0.100000 | 1.200000 | 9.650000 | 0.000000 | 0.000000 | -1.900000 | 107.100000 | 109.200000 | -6.100000 | 7.400000 | 1.150000 | 11.900000 | 1.900000 | 9.350000 | 6.000000 | 7.800000 | 49.450000 | 52.350000 | 13.900000 | 99.760000 | 6.700000 | 699.000000 | 30.500000 | 12.000000 | 14.000000 | 45.100000 | 26.000000 | 38.000000 | 1.100000 | 17.600000 | 7.350000 | 9.500000 | 11.800000 | 30.400000 | 38.600000 | 28.600000 | 74.900000 | 0.000000 | 53.000000 | 20.100000 |
| 50% | 25.000000 | 19.200000 | 7.000000 | 2.700000 | 5.800000 | 45.500000 | 0.800000 | 2.400000 | 34.200000 | 0.900000 | 1.200000 | 76.300000 | 0.700000 | 2.300000 | 3.000000 | 1.400000 | 0.900000 | 0.600000 | 0.300000 | 1.600000 | 15.300000 | 0.000000 | 0.000000 | -0.200000 | 111.100000 | 112.200000 | -0.600000 | 11.400000 | 1.670000 | 16.400000 | 3.400000 | 12.000000 | 7.900000 | 9.900000 | 53.800000 | 56.700000 | 17.200000 | 101.670000 | 8.600000 | 2066.000000 | 54.000000 | 25.000000 | 25.000000 | 59.100000 | 40.800000 | 50.000000 | 4.500000 | 33.800000 | 11.300000 | 13.400000 | 14.700000 | 41.800000 | 56.800000 | 41.300000 | 93.100000 | 4.000000 | 67.100000 | 31.700000 |
| 75% | 29.000000 | 28.350000 | 12.100000 | 4.500000 | 9.450000 | 50.600000 | 1.500000 | 4.100000 | 38.500000 | 1.850000 | 2.400000 | 84.100000 | 1.100000 | 3.500000 | 4.500000 | 2.750000 | 1.500000 | 0.800000 | 0.500000 | 2.200000 | 25.100000 | 3.000000 | 0.000000 | 1.350000 | 114.300000 | 115.300000 | 3.300000 | 18.400000 | 2.300000 | 21.650000 | 6.100000 | 16.350000 | 11.100000 | 11.700000 | 57.600000 | 60.900000 | 20.600000 | 103.225000 | 10.900000 | 3879.500000 | 68.000000 | 36.000000 | 34.000000 | 73.600000 | 54.650000 | 64.950000 | 8.750000 | 48.700000 | 15.800000 | 18.200000 | 17.450000 | 58.550000 | 69.050000 | 58.450000 | 100.000000 | 14.550000 | 79.350000 | 45.600000 |
| max | 42.000000 | 40.800000 | 33.100000 | 11.200000 | 22.200000 | 100.000000 | 4.900000 | 11.400000 | 100.000000 | 10.000000 | 12.300000 | 100.000000 | 5.100000 | 9.600000 | 12.500000 | 10.700000 | 4.100000 | 3.000000 | 3.000000 | 5.000000 | 56.800000 | 65.000000 | 29.000000 | 9.300000 | 175.000000 | 163.600000 | 95.000000 | 45.700000 | 11.000000 | 58.300000 | 25.000000 | 40.000000 | 25.600000 | 100.000000 | 100.000000 | 106.400000 | 50.000000 | 127.290000 | 36.500000 | 6079.000000 | 83.000000 | 57.000000 | 60.000000 | 100.000000 | 100.000000 | 100.000000 | 66.700000 | 100.000000 | 100.000000 | 100.000000 | 66.700000 | 100.000000 | 100.000000 | 100.000000 | 100.000000 | 100.000000 | 100.000000 | 100.000000 |
We can already see some important things from the descriptive statistics. The first thing is that there are many variables whose minimum is 0. This means a player averaged 0 points per game or 0 free throw attempts per game, or whatever the variable may be. That's not going to help us, because it means those players are generally "garbage time players". They only play in the last few minutes when the score is lopsided. Even worse is a NETRTG of -100 or a PIE of -10.8. These are outliers caused by a lack of playing time and they could affect what we are trying to do. I will filter out those that don't play enough. The second thing is that there doesn't seem to be any data errors. There's no negative numbers where there shouldn't be or things of that nature. The third thing is that these variables are on different scales. We may need to standardize or normalize later on.
Let's look at the pairwise correlations and see what variables are highly correlated
correlations = df.corr()
correlations
| Age | Min | PTS | FGM | FGA | FG% | 3PM | 3PA | 3P% | FTM | FTA | FT% | OREB | DREB | REB | AST | TOV | STL | BLK | PF | FP | DD2 | TD3 | +/- | OFFRTG | DEFRTG | NETRTG | AST% | AST/TO | AST RATIO | OREB% | DREB% | REB% | TO RATIO | EFG% | TS% | USG% | PACE | PIE | POSS | GP | W | L | %FGA2PT | %FGA3PT | %PTS2PT | %PTS2PT MR | %PTS3PT | %PTSFBPs | %PTSFT | %PTSOFFTO | %PTSPITP | 2FGM%AST | 2FGM%UAST | 3FGM%AST | 3FGM%UAST | FGM%AST | FGM%UAST | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Age | 1.000000 | 0.141465 | 0.106178 | 0.093927 | 0.087601 | 0.069254 | 0.154838 | 0.136059 | 0.103236 | 0.086528 | 0.070279 | 0.130108 | -0.010731 | 0.115184 | 0.083177 | 0.171299 | 0.079445 | 0.086499 | 0.026236 | 0.083763 | 0.131073 | 0.073168 | 0.037615 | 0.203539 | 0.165613 | -0.006589 | 0.132952 | 0.145083 | 0.157116 | 0.195219 | -0.059278 | 0.001711 | -0.011425 | -0.007156 | 0.124377 | 0.129556 | -0.017307 | -0.099569 | 0.103656 | 0.108307 | 0.085892 | 0.172614 | -0.028632 | -0.071695 | 0.090077 | -0.089300 | 0.146203 | 0.097349 | -0.140179 | -0.026550 | -0.116107 | -0.137336 | 0.015655 | 0.039718 | 0.020868 | 0.013203 | 0.027992 | -0.012590 |
| Min | 0.141465 | 1.000000 | 0.874386 | 0.879344 | 0.883663 | 0.185107 | 0.682903 | 0.696960 | 0.211986 | 0.710354 | 0.708431 | 0.347905 | 0.386093 | 0.750189 | 0.688285 | 0.730612 | 0.786925 | 0.721082 | 0.379099 | 0.752922 | 0.918150 | 0.488944 | 0.194129 | 0.170033 | 0.432883 | 0.356788 | 0.121676 | 0.369311 | 0.184209 | 0.177022 | -0.137951 | 0.048167 | -0.013240 | -0.087250 | 0.220694 | 0.273186 | 0.414689 | -0.303390 | 0.428535 | 0.877030 | 0.649891 | 0.589629 | 0.555768 | 0.073892 | -0.044962 | 0.014108 | 0.177274 | -0.004497 | -0.062308 | -0.016905 | 0.018578 | -0.047885 | -0.115571 | 0.305266 | 0.026122 | 0.356889 | -0.181382 | 0.289378 |
| PTS | 0.106178 | 0.874386 | 1.000000 | 0.991753 | 0.982070 | 0.201940 | 0.702705 | 0.715586 | 0.218166 | 0.900180 | 0.890223 | 0.327889 | 0.304307 | 0.714216 | 0.635354 | 0.722161 | 0.852160 | 0.581842 | 0.327158 | 0.604013 | 0.952663 | 0.555874 | 0.272061 | 0.253677 | 0.385475 | 0.251745 | 0.148388 | 0.439515 | 0.072220 | 0.023455 | -0.137635 | 0.097341 | 0.022448 | -0.098331 | 0.210333 | 0.291327 | 0.715835 | -0.217161 | 0.596013 | 0.767651 | 0.506893 | 0.478508 | 0.413408 | 0.135628 | -0.114121 | 0.047683 | 0.248702 | -0.089468 | -0.039062 | 0.086858 | -0.016638 | -0.039999 | -0.197412 | 0.367293 | -0.058219 | 0.506853 | -0.315100 | 0.406371 |
| FGM | 0.093927 | 0.879344 | 0.991753 | 1.000000 | 0.977324 | 0.249524 | 0.659124 | 0.671131 | 0.191461 | 0.862378 | 0.861850 | 0.299364 | 0.360724 | 0.745424 | 0.676889 | 0.713027 | 0.850130 | 0.581773 | 0.364302 | 0.626573 | 0.959370 | 0.579334 | 0.278209 | 0.241080 | 0.382224 | 0.260198 | 0.140731 | 0.427103 | 0.063207 | 0.018972 | -0.090841 | 0.134623 | 0.069412 | -0.089439 | 0.236849 | 0.301693 | 0.698401 | -0.215185 | 0.603178 | 0.769704 | 0.513014 | 0.482467 | 0.420363 | 0.196702 | -0.174949 | 0.120044 | 0.255441 | -0.138863 | -0.037951 | 0.051201 | -0.015458 | 0.027996 | -0.174075 | 0.355391 | -0.064089 | 0.472211 | -0.311867 | 0.408323 |
| FGA | 0.087601 | 0.883663 | 0.982070 | 0.977324 | 1.000000 | 0.102137 | 0.745964 | 0.775336 | 0.217512 | 0.842125 | 0.829402 | 0.329343 | 0.241994 | 0.673827 | 0.584811 | 0.741067 | 0.849196 | 0.598588 | 0.269203 | 0.588238 | 0.930816 | 0.499863 | 0.236845 | 0.210371 | 0.367977 | 0.263714 | 0.127526 | 0.466512 | 0.102980 | 0.052466 | -0.203355 | 0.045574 | -0.046635 | -0.122794 | 0.120320 | 0.192273 | 0.721353 | -0.218987 | 0.525808 | 0.771661 | 0.506447 | 0.464970 | 0.427187 | 0.079130 | -0.055504 | 0.006700 | 0.279357 | -0.032357 | -0.022375 | 0.050566 | -0.011706 | -0.090487 | -0.241023 | 0.409555 | -0.029832 | 0.527010 | -0.337194 | 0.426410 |
| FG% | 0.069254 | 0.185107 | 0.201940 | 0.249524 | 0.102137 | 1.000000 | -0.111260 | -0.177823 | 0.043123 | 0.167621 | 0.213223 | 0.049772 | 0.480525 | 0.352392 | 0.420051 | 0.030367 | 0.152143 | 0.048938 | 0.372583 | 0.321116 | 0.258848 | 0.268598 | 0.086526 | 0.088961 | 0.186923 | 0.060094 | 0.109435 | -0.111962 | -0.155847 | -0.160476 | 0.437493 | 0.313422 | 0.438926 | 0.114673 | 0.916954 | 0.874155 | 0.004980 | 0.015055 | 0.485037 | 0.187085 | 0.230157 | 0.216416 | 0.188630 | 0.616131 | -0.549303 | 0.625311 | 0.048810 | -0.435936 | -0.013374 | -0.290526 | 0.044939 | 0.590949 | 0.374713 | -0.019165 | -0.121650 | -0.095382 | 0.138259 | 0.105023 |
| 3PM | 0.154838 | 0.682903 | 0.702705 | 0.659124 | 0.745964 | -0.111260 | 1.000000 | 0.981637 | 0.433537 | 0.467877 | 0.416197 | 0.350332 | -0.131135 | 0.314180 | 0.194871 | 0.532010 | 0.534407 | 0.456959 | -0.030592 | 0.361620 | 0.598519 | 0.124891 | 0.070578 | 0.199921 | 0.318373 | 0.209821 | 0.121546 | 0.301674 | 0.165752 | 0.062984 | -0.449947 | -0.214024 | -0.340266 | -0.179836 | 0.116529 | 0.161128 | 0.459120 | -0.176279 | 0.254609 | 0.594304 | 0.405484 | 0.404778 | 0.306983 | -0.427402 | 0.449799 | -0.444988 | 0.168861 | 0.473784 | 0.016379 | -0.110369 | -0.002219 | -0.491153 | -0.174183 | 0.295590 | 0.113869 | 0.463847 | -0.079265 | 0.150268 |
| 3PA | 0.136059 | 0.696960 | 0.715586 | 0.671131 | 0.775336 | -0.177823 | 0.981637 | 1.000000 | 0.367110 | 0.496895 | 0.446703 | 0.362414 | -0.137714 | 0.328019 | 0.203036 | 0.554115 | 0.560261 | 0.467127 | -0.031571 | 0.367323 | 0.612666 | 0.140196 | 0.080657 | 0.184224 | 0.313021 | 0.206793 | 0.119247 | 0.323465 | 0.167302 | 0.065453 | -0.468885 | -0.213678 | -0.349825 | -0.197065 | 0.027589 | 0.080920 | 0.498755 | -0.175473 | 0.235118 | 0.605785 | 0.406479 | 0.392165 | 0.322406 | -0.438002 | 0.462237 | -0.435696 | 0.176347 | 0.451638 | 0.027958 | -0.084307 | 0.000845 | -0.484758 | -0.201537 | 0.318662 | 0.119421 | 0.476755 | -0.109815 | 0.175289 |
| 3P% | 0.103236 | 0.211986 | 0.218166 | 0.191461 | 0.217512 | 0.043123 | 0.433537 | 0.367110 | 1.000000 | 0.117932 | 0.072982 | 0.248766 | -0.230716 | 0.022567 | -0.055375 | 0.175239 | 0.143592 | 0.165712 | -0.124196 | 0.089520 | 0.152959 | -0.061319 | 0.039103 | 0.059028 | 0.155744 | 0.086691 | 0.069012 | 0.134837 | 0.140851 | 0.101097 | -0.352908 | -0.163517 | -0.265248 | -0.121852 | 0.300090 | 0.324806 | 0.110282 | -0.138321 | 0.183809 | 0.182613 | 0.171067 | 0.174296 | 0.125709 | -0.323310 | 0.364654 | -0.401728 | 0.051840 | 0.503311 | -0.066224 | -0.246186 | 0.039068 | -0.408444 | -0.020505 | 0.107288 | 0.584347 | 0.241393 | 0.151911 | -0.023200 |
| FTM | 0.086528 | 0.710354 | 0.900180 | 0.862378 | 0.842125 | 0.167621 | 0.467877 | 0.496895 | 0.117932 | 1.000000 | 0.985847 | 0.311562 | 0.281225 | 0.630862 | 0.565132 | 0.647616 | 0.787414 | 0.482182 | 0.312064 | 0.492492 | 0.851447 | 0.560635 | 0.286735 | 0.247745 | 0.320940 | 0.175941 | 0.143852 | 0.429674 | 0.027996 | 0.010367 | -0.069928 | 0.126239 | 0.071258 | -0.050107 | 0.116767 | 0.245922 | 0.704996 | -0.183652 | 0.589804 | 0.631733 | 0.392619 | 0.366411 | 0.324762 | 0.219283 | -0.205254 | 0.082129 | 0.197742 | -0.224059 | -0.062077 | 0.285242 | -0.020497 | 0.011176 | -0.226396 | 0.339564 | -0.120076 | 0.493395 | -0.366770 | 0.426714 |
| FTA | 0.070279 | 0.708431 | 0.890223 | 0.861850 | 0.829402 | 0.213223 | 0.416197 | 0.446703 | 0.072982 | 0.985847 | 1.000000 | 0.249894 | 0.357792 | 0.669413 | 0.618707 | 0.628688 | 0.789800 | 0.472599 | 0.359102 | 0.514481 | 0.857341 | 0.598119 | 0.303343 | 0.219547 | 0.311326 | 0.201777 | 0.120652 | 0.411901 | 0.003208 | -0.007810 | 0.000765 | 0.173370 | 0.135861 | -0.034788 | 0.140846 | 0.251022 | 0.695193 | -0.178890 | 0.591538 | 0.626519 | 0.392780 | 0.361620 | 0.330223 | 0.281697 | -0.267137 | 0.143710 | 0.185288 | -0.277138 | -0.060697 | 0.276308 | -0.019945 | 0.075335 | -0.202141 | 0.323565 | -0.155252 | 0.469244 | -0.361292 | 0.426745 |
| FT% | 0.130108 | 0.347905 | 0.327889 | 0.299364 | 0.329343 | 0.049772 | 0.350332 | 0.362414 | 0.248766 | 0.311562 | 0.249894 | 1.000000 | -0.021217 | 0.179896 | 0.128425 | 0.246562 | 0.235274 | 0.211581 | 0.055422 | 0.229036 | 0.295579 | 0.079474 | 0.041074 | 0.133342 | 0.299523 | 0.164010 | 0.134460 | 0.124153 | 0.169846 | 0.025403 | -0.187339 | -0.098987 | -0.138531 | -0.139600 | 0.128982 | 0.286234 | 0.170044 | -0.183344 | 0.222807 | 0.343386 | 0.368069 | 0.330139 | 0.318860 | -0.113781 | 0.171839 | -0.132955 | 0.065192 | 0.103377 | -0.096920 | 0.041051 | 0.229101 | -0.151972 | 0.100670 | 0.195130 | 0.256770 | 0.175276 | 0.062292 | 0.088549 |
| OREB | -0.010731 | 0.386093 | 0.304307 | 0.360724 | 0.241994 | 0.480525 | -0.131135 | -0.137714 | -0.230716 | 0.281225 | 0.357792 | -0.021217 | 1.000000 | 0.684341 | 0.834514 | 0.093881 | 0.293349 | 0.195129 | 0.638469 | 0.561199 | 0.466433 | 0.592067 | 0.154833 | 0.080565 | 0.223772 | 0.207250 | 0.048879 | -0.106940 | -0.192423 | -0.156650 | 0.748574 | 0.535720 | 0.711123 | 0.069398 | 0.310442 | 0.289777 | 0.034326 | -0.159435 | 0.350115 | 0.358589 | 0.325323 | 0.273328 | 0.301741 | 0.547570 | -0.532101 | 0.488431 | -0.137768 | -0.486455 | -0.180319 | 0.056030 | -0.027428 | 0.522477 | 0.247337 | -0.134427 | -0.181373 | -0.179885 | 0.048466 | -0.006011 |
| DREB | 0.115184 | 0.750189 | 0.714216 | 0.745424 | 0.673827 | 0.352392 | 0.314180 | 0.328019 | 0.022567 | 0.630862 | 0.669413 | 0.179896 | 0.684341 | 1.000000 | 0.972541 | 0.499720 | 0.679953 | 0.459379 | 0.593703 | 0.709498 | 0.837013 | 0.783909 | 0.341385 | 0.219016 | 0.339764 | 0.228962 | 0.126481 | 0.212342 | -0.020477 | 0.018034 | 0.242586 | 0.580617 | 0.530536 | 0.017758 | 0.283055 | 0.311139 | 0.370118 | -0.218660 | 0.573347 | 0.673426 | 0.506842 | 0.476880 | 0.415071 | 0.315229 | -0.296860 | 0.254667 | 0.039589 | -0.260779 | -0.131785 | 0.043063 | -0.057218 | 0.233679 | 0.076586 | 0.074334 | -0.061567 | 0.115015 | -0.069850 | 0.144197 |
| REB | 0.083177 | 0.688285 | 0.635354 | 0.676889 | 0.584811 | 0.420051 | 0.194871 | 0.203036 | -0.055375 | 0.565132 | 0.618707 | 0.128425 | 0.834514 | 0.972541 | 1.000000 | 0.406373 | 0.605686 | 0.407788 | 0.652016 | 0.713829 | 0.779791 | 0.780760 | 0.307263 | 0.191439 | 0.327864 | 0.237605 | 0.111889 | 0.125366 | -0.077162 | -0.037244 | 0.422077 | 0.609204 | 0.627467 | 0.035060 | 0.313507 | 0.328119 | 0.290217 | -0.215727 | 0.544821 | 0.622193 | 0.486234 | 0.447302 | 0.409177 | 0.412976 | -0.394173 | 0.348680 | -0.013234 | -0.352234 | -0.156709 | 0.049625 | -0.053086 | 0.343391 | 0.137180 | 0.013162 | -0.102779 | 0.029368 | -0.036760 | 0.106847 |
| AST | 0.171299 | 0.730612 | 0.722161 | 0.713027 | 0.741067 | 0.030367 | 0.532010 | 0.554115 | 0.175239 | 0.647616 | 0.628688 | 0.246562 | 0.093881 | 0.499720 | 0.406373 | 1.000000 | 0.831550 | 0.652683 | 0.090063 | 0.463791 | 0.788550 | 0.469106 | 0.387921 | 0.181743 | 0.293109 | 0.196372 | 0.109925 | 0.828521 | 0.414712 | 0.570004 | -0.260903 | -0.040228 | -0.134178 | -0.016668 | 0.037963 | 0.100635 | 0.494615 | -0.162441 | 0.446034 | 0.576156 | 0.340179 | 0.336295 | 0.261091 | 0.066875 | -0.049294 | -0.003249 | 0.222678 | -0.024176 | -0.031898 | 0.052775 | -0.021240 | -0.080517 | -0.425670 | 0.523654 | -0.098820 | 0.576870 | -0.467892 | 0.546260 |
| TOV | 0.079445 | 0.786925 | 0.852160 | 0.850130 | 0.849196 | 0.152143 | 0.534407 | 0.560261 | 0.143592 | 0.787414 | 0.789800 | 0.235274 | 0.293349 | 0.679953 | 0.605686 | 0.831550 | 1.000000 | 0.567522 | 0.276845 | 0.638223 | 0.868854 | 0.559351 | 0.335870 | 0.137193 | 0.278104 | 0.260382 | 0.059391 | 0.617437 | 0.052111 | 0.237711 | -0.101675 | 0.131922 | 0.059715 | 0.217613 | 0.116198 | 0.181782 | 0.654152 | -0.147383 | 0.469491 | 0.646323 | 0.404578 | 0.358509 | 0.355206 | 0.195102 | -0.194467 | 0.112423 | 0.178602 | -0.166132 | -0.069403 | 0.117911 | -0.044408 | 0.047135 | -0.302428 | 0.427283 | -0.125464 | 0.522398 | -0.425212 | 0.500964 |
| STL | 0.086499 | 0.721082 | 0.581842 | 0.581773 | 0.598588 | 0.048938 | 0.456959 | 0.467127 | 0.165712 | 0.482182 | 0.472599 | 0.211581 | 0.195129 | 0.459379 | 0.407788 | 0.652683 | 0.567522 | 1.000000 | 0.201542 | 0.530113 | 0.684821 | 0.283126 | 0.167104 | 0.180937 | 0.344651 | 0.158133 | 0.172883 | 0.398148 | 0.276100 | 0.351376 | -0.165546 | -0.053282 | -0.105540 | -0.059931 | 0.073198 | 0.130559 | 0.257482 | -0.215612 | 0.341881 | 0.596023 | 0.414046 | 0.378905 | 0.350574 | 0.035664 | -0.010075 | -0.024276 | 0.107104 | 0.018904 | 0.009756 | 0.007439 | 0.139936 | -0.060910 | -0.260259 | 0.324435 | -0.014121 | 0.346934 | -0.266962 | 0.325863 |
| BLK | 0.026236 | 0.379099 | 0.327158 | 0.364302 | 0.269203 | 0.372583 | -0.030592 | -0.031571 | -0.124196 | 0.312064 | 0.359102 | 0.055422 | 0.638469 | 0.593703 | 0.652016 | 0.090063 | 0.276845 | 0.201542 | 1.000000 | 0.520855 | 0.471399 | 0.443288 | 0.061188 | 0.115206 | 0.212560 | 0.120279 | 0.092874 | -0.092911 | -0.136011 | -0.155361 | 0.391706 | 0.380875 | 0.454647 | 0.029283 | 0.252134 | 0.269752 | 0.108259 | -0.137686 | 0.342870 | 0.359320 | 0.310445 | 0.288612 | 0.257987 | 0.388163 | -0.374269 | 0.351937 | -0.047344 | -0.357623 | -0.170207 | 0.054157 | -0.034144 | 0.358421 | 0.223246 | -0.124426 | -0.086044 | -0.107780 | 0.068285 | -0.038397 |
| PF | 0.083763 | 0.752922 | 0.604013 | 0.626573 | 0.588238 | 0.321116 | 0.361620 | 0.367323 | 0.089520 | 0.492492 | 0.514481 | 0.229036 | 0.561199 | 0.709498 | 0.713829 | 0.463791 | 0.638223 | 0.530113 | 0.520855 | 1.000000 | 0.703732 | 0.427184 | 0.136517 | 0.095060 | 0.348555 | 0.316532 | 0.080074 | 0.164109 | 0.001248 | 0.081502 | 0.166965 | 0.227263 | 0.247239 | 0.108332 | 0.288200 | 0.313451 | 0.214853 | -0.187442 | 0.269976 | 0.631287 | 0.514637 | 0.433790 | 0.475816 | 0.244750 | -0.221258 | 0.199666 | -0.013112 | -0.170082 | -0.143138 | -0.032886 | 0.005814 | 0.198407 | 0.097830 | 0.103337 | 0.047883 | 0.071721 | -0.023693 | 0.125993 |
| FP | 0.131073 | 0.918150 | 0.952663 | 0.959370 | 0.930816 | 0.258848 | 0.598519 | 0.612666 | 0.152959 | 0.851447 | 0.857341 | 0.295579 | 0.466433 | 0.837013 | 0.779791 | 0.788550 | 0.868854 | 0.684821 | 0.471399 | 0.703732 | 1.000000 | 0.665397 | 0.329808 | 0.260219 | 0.415570 | 0.264402 | 0.164159 | 0.482623 | 0.138206 | 0.150552 | -0.014777 | 0.217182 | 0.161261 | -0.072387 | 0.233156 | 0.302889 | 0.604438 | -0.246247 | 0.628636 | 0.796185 | 0.545229 | 0.515402 | 0.443913 | 0.224107 | -0.198628 | 0.135543 | 0.192292 | -0.165128 | -0.079522 | 0.073973 | -0.014394 | 0.064932 | -0.172276 | 0.340305 | -0.084370 | 0.436525 | -0.300949 | 0.394651 |
| DD2 | 0.073168 | 0.488944 | 0.555874 | 0.579334 | 0.499863 | 0.268598 | 0.124891 | 0.140196 | -0.061319 | 0.560635 | 0.598119 | 0.079474 | 0.592067 | 0.783909 | 0.780760 | 0.469106 | 0.559351 | 0.283126 | 0.443288 | 0.427184 | 0.665397 | 1.000000 | 0.514980 | 0.232090 | 0.223949 | 0.104231 | 0.111582 | 0.289120 | -0.000653 | 0.044902 | 0.250452 | 0.459809 | 0.446507 | 0.016137 | 0.175619 | 0.207475 | 0.368663 | -0.157062 | 0.506514 | 0.504705 | 0.350709 | 0.352824 | 0.262576 | 0.310922 | -0.305532 | 0.239307 | 0.035567 | -0.280752 | -0.129537 | 0.109683 | -0.077705 | 0.220153 | -0.036590 | 0.101178 | -0.148219 | 0.165092 | -0.164574 | 0.199610 |
| TD3 | 0.037615 | 0.194129 | 0.272061 | 0.278209 | 0.236845 | 0.086526 | 0.070578 | 0.080657 | 0.039103 | 0.286735 | 0.303343 | 0.041074 | 0.154833 | 0.341385 | 0.307263 | 0.387921 | 0.335870 | 0.167104 | 0.061188 | 0.136517 | 0.329808 | 0.514980 | 1.000000 | 0.181596 | 0.118975 | 0.032664 | 0.072843 | 0.321799 | 0.080508 | 0.151314 | 0.033172 | 0.190450 | 0.155771 | 0.014352 | 0.059209 | 0.077643 | 0.217692 | -0.045190 | 0.265024 | 0.187187 | 0.106959 | 0.146937 | 0.037674 | 0.093627 | -0.091947 | 0.062766 | 0.011781 | -0.087133 | -0.038321 | 0.054935 | -0.021254 | 0.056841 | -0.079160 | 0.101356 | -0.039992 | 0.156454 | -0.127723 | 0.142270 |
| +/- | 0.203539 | 0.170033 | 0.253677 | 0.241080 | 0.210371 | 0.088961 | 0.199921 | 0.184224 | 0.059028 | 0.247745 | 0.219547 | 0.133342 | 0.080565 | 0.219016 | 0.191439 | 0.181743 | 0.137193 | 0.180937 | 0.115206 | 0.095060 | 0.260219 | 0.232090 | 0.181596 | 1.000000 | 0.450487 | -0.515240 | 0.663678 | 0.093910 | 0.076671 | 0.059214 | -0.000177 | 0.047203 | 0.058232 | -0.113271 | 0.123898 | 0.174803 | 0.168016 | -0.120899 | 0.305343 | 0.276027 | 0.231111 | 0.442510 | -0.053380 | -0.033441 | 0.050389 | -0.042718 | -0.101177 | 0.020576 | 0.080358 | 0.037694 | -0.003015 | -0.006536 | -0.031007 | 0.072220 | -0.021278 | 0.136110 | -0.055969 | 0.065693 |
| OFFRTG | 0.165613 | 0.432883 | 0.385475 | 0.382224 | 0.367977 | 0.186923 | 0.318373 | 0.313021 | 0.155744 | 0.320940 | 0.311326 | 0.299523 | 0.223772 | 0.339764 | 0.327864 | 0.293109 | 0.278104 | 0.344651 | 0.212560 | 0.348555 | 0.415570 | 0.223949 | 0.118975 | 0.450487 | 1.000000 | -0.025445 | 0.795575 | 0.073945 | 0.053480 | 0.070264 | -0.007090 | -0.004201 | 0.014206 | -0.180783 | 0.226595 | 0.330474 | 0.260694 | -0.123759 | 0.337983 | 0.408410 | 0.373148 | 0.391776 | 0.261719 | 0.059041 | 0.059130 | 0.017087 | -0.001807 | -0.018886 | -0.059495 | 0.005583 | 0.022224 | 0.017162 | 0.008063 | 0.118516 | 0.027437 | 0.120799 | -0.063968 | 0.097334 |
| DEFRTG | -0.006589 | 0.356788 | 0.251745 | 0.260198 | 0.263714 | 0.060094 | 0.209821 | 0.206793 | 0.086691 | 0.175941 | 0.201777 | 0.164010 | 0.207250 | 0.228962 | 0.237605 | 0.196372 | 0.260382 | 0.158133 | 0.120279 | 0.316532 | 0.264402 | 0.104231 | 0.032664 | -0.515240 | -0.025445 | 1.000000 | -0.625889 | 0.094269 | 0.053942 | 0.049661 | 0.077211 | 0.047718 | 0.045925 | 0.126758 | 0.056873 | 0.012624 | 0.004033 | -0.151833 | -0.109561 | 0.247599 | 0.244337 | 0.115898 | 0.322996 | 0.064602 | -0.080261 | 0.010741 | 0.094712 | -0.023834 | -0.273729 | 0.026700 | 0.001298 | -0.022421 | 0.031791 | 0.054166 | 0.045775 | 0.082255 | -0.014693 | 0.029996 |
| NETRTG | 0.132952 | 0.121676 | 0.148388 | 0.140731 | 0.127526 | 0.109435 | 0.121546 | 0.119247 | 0.069012 | 0.143852 | 0.120652 | 0.134460 | 0.048879 | 0.126481 | 0.111889 | 0.109925 | 0.059391 | 0.172883 | 0.092874 | 0.080074 | 0.164159 | 0.111582 | 0.072843 | 0.663678 | 0.795575 | -0.625889 | 1.000000 | 0.000708 | 0.009141 | 0.024777 | -0.052475 | -0.032109 | -0.016763 | -0.217629 | 0.142332 | 0.250250 | 0.201123 | -0.004644 | 0.330261 | 0.168756 | 0.143224 | 0.235503 | 0.008674 | 0.006794 | 0.094816 | 0.006822 | -0.058782 | -0.000317 | 0.119638 | -0.011774 | 0.016400 | 0.026968 | -0.013288 | 0.059837 | -0.006123 | 0.044441 | -0.041133 | 0.057901 |
| AST% | 0.145083 | 0.369311 | 0.439515 | 0.427103 | 0.466512 | -0.111962 | 0.301674 | 0.323465 | 0.134837 | 0.429674 | 0.411901 | 0.124153 | -0.106940 | 0.212342 | 0.125366 | 0.828521 | 0.617437 | 0.398148 | -0.092911 | 0.164109 | 0.482623 | 0.289120 | 0.321799 | 0.093910 | 0.073945 | 0.094269 | 0.000708 | 1.000000 | 0.523037 | 0.761986 | -0.287646 | -0.105001 | -0.199642 | 0.030295 | -0.115420 | -0.074478 | 0.447669 | -0.145246 | 0.318568 | 0.245004 | 0.084290 | 0.100258 | 0.046441 | 0.056102 | -0.029798 | -0.018400 | 0.231344 | 0.007992 | -0.030535 | 0.017924 | -0.023052 | -0.098144 | -0.531034 | 0.566061 | -0.081342 | 0.564587 | -0.515915 | 0.597266 |
| AST/TO | 0.157116 | 0.184209 | 0.072220 | 0.063207 | 0.102980 | -0.155847 | 0.165752 | 0.167302 | 0.140851 | 0.027996 | 0.003208 | 0.169846 | -0.192423 | -0.020477 | -0.077162 | 0.414712 | 0.052111 | 0.276100 | -0.136011 | 0.001248 | 0.138206 | -0.000653 | 0.080508 | 0.076671 | 0.053480 | 0.053942 | 0.009141 | 0.523037 | 1.000000 | 0.674596 | -0.332908 | -0.256875 | -0.322073 | -0.223700 | -0.075759 | -0.068249 | -0.105039 | -0.094665 | 0.043066 | 0.118655 | 0.094866 | 0.126180 | 0.037883 | -0.179552 | 0.208245 | -0.167999 | 0.103530 | 0.248542 | 0.022469 | -0.176749 | 0.095988 | -0.199121 | -0.206284 | 0.278951 | 0.129007 | 0.241027 | -0.118755 | 0.220256 |
| AST RATIO | 0.195219 | 0.177022 | 0.023455 | 0.018972 | 0.052466 | -0.160476 | 0.062984 | 0.065453 | 0.101097 | 0.010367 | -0.007810 | 0.025403 | -0.156650 | 0.018034 | -0.037244 | 0.570004 | 0.237711 | 0.351376 | -0.155361 | 0.081502 | 0.150552 | 0.044902 | 0.151314 | 0.059214 | 0.070264 | 0.049661 | 0.024777 | 0.761986 | 0.674596 | 1.000000 | -0.257821 | -0.187905 | -0.244955 | 0.009624 | -0.118594 | -0.125127 | -0.125611 | -0.103250 | 0.026637 | 0.076024 | 0.036676 | 0.063449 | -0.001167 | -0.073228 | 0.109251 | -0.103968 | 0.046679 | 0.158414 | -0.061983 | -0.118304 | 0.022575 | -0.117218 | -0.375948 | 0.323669 | 0.041878 | 0.244618 | -0.234344 | 0.301080 |
| OREB% | -0.059278 | -0.137951 | -0.137635 | -0.090841 | -0.203355 | 0.437493 | -0.449947 | -0.468885 | -0.352908 | -0.069928 | 0.000765 | -0.187339 | 0.748574 | 0.242586 | 0.422077 | -0.260903 | -0.101675 | -0.165546 | 0.391706 | 0.166965 | -0.014777 | 0.250452 | 0.033172 | -0.000177 | -0.007090 | 0.077211 | -0.052475 | -0.287646 | -0.332908 | -0.257821 | 1.000000 | 0.521086 | 0.806027 | 0.167191 | 0.224621 | 0.161376 | -0.160629 | -0.035100 | 0.120321 | -0.095092 | -0.002859 | -0.015595 | 0.011572 | 0.609756 | -0.592998 | 0.570164 | -0.234154 | -0.564996 | -0.076489 | 0.059860 | -0.023574 | 0.635393 | 0.323206 | -0.269522 | -0.258817 | -0.325879 | 0.119056 | -0.109940 |
| DREB% | 0.001711 | 0.048167 | 0.097341 | 0.134623 | 0.045574 | 0.313422 | -0.214024 | -0.213678 | -0.163517 | 0.126239 | 0.173370 | -0.098987 | 0.535720 | 0.580617 | 0.609204 | -0.040228 | 0.131922 | -0.053282 | 0.380875 | 0.227263 | 0.217182 | 0.459809 | 0.190450 | 0.047203 | -0.004201 | 0.047718 | -0.032109 | -0.105001 | -0.256875 | -0.187905 | 0.521086 | 1.000000 | 0.919335 | 0.147905 | 0.175132 | 0.143923 | 0.104302 | 0.005398 | 0.405442 | 0.060621 | 0.066473 | 0.063275 | 0.053647 | 0.391610 | -0.392438 | 0.377686 | -0.140199 | -0.383610 | -0.066681 | 0.057776 | -0.075679 | 0.415706 | 0.243443 | -0.174812 | -0.122456 | -0.203483 | 0.076435 | -0.070089 |
| REB% | -0.011425 | -0.013240 | 0.022448 | 0.069412 | -0.046635 | 0.438926 | -0.340266 | -0.349825 | -0.265248 | 0.071258 | 0.135861 | -0.138531 | 0.711123 | 0.530536 | 0.627467 | -0.134178 | 0.059715 | -0.105540 | 0.454647 | 0.247239 | 0.161261 | 0.446507 | 0.155771 | 0.058232 | 0.014206 | 0.045925 | -0.016763 | -0.199642 | -0.322073 | -0.244955 | 0.806027 | 0.919335 | 1.000000 | 0.174081 | 0.250060 | 0.201288 | 0.005275 | 0.001926 | 0.357209 | 0.016133 | 0.061804 | 0.058817 | 0.049895 | 0.549176 | -0.539655 | 0.527960 | -0.194199 | -0.519679 | -0.054421 | 0.048651 | -0.066231 | 0.580501 | 0.331210 | -0.242546 | -0.195807 | -0.287006 | 0.120907 | -0.098675 |
| TO RATIO | -0.007156 | -0.087250 | -0.098331 | -0.089439 | -0.122794 | 0.114673 | -0.179836 | -0.197065 | -0.121852 | -0.050107 | -0.034788 | -0.139600 | 0.069398 | 0.017758 | 0.035060 | -0.016668 | 0.217613 | -0.059931 | 0.029283 | 0.108332 | -0.072387 | 0.016137 | 0.014352 | -0.113271 | -0.180783 | 0.126758 | -0.217629 | 0.030295 | -0.223700 | 0.009624 | 0.167191 | 0.147905 | 0.174081 | 1.000000 | 0.023590 | -0.009359 | -0.017609 | 0.122754 | -0.258277 | -0.098678 | -0.082968 | -0.093460 | -0.051346 | 0.178528 | -0.291301 | 0.198585 | -0.141550 | -0.255983 | -0.076365 | 0.135623 | -0.058221 | 0.242068 | -0.081048 | 0.037615 | -0.208840 | 0.000252 | -0.196610 | 0.143277 |
| EFG% | 0.124377 | 0.220694 | 0.210333 | 0.236849 | 0.120320 | 0.916954 | 0.116529 | 0.027589 | 0.300090 | 0.116767 | 0.140846 | 0.128982 | 0.310442 | 0.283055 | 0.313507 | 0.037963 | 0.116198 | 0.073198 | 0.252134 | 0.288200 | 0.233156 | 0.175619 | 0.059209 | 0.123898 | 0.226595 | 0.056873 | 0.142332 | -0.115420 | -0.075759 | -0.118594 | 0.224621 | 0.175132 | 0.250060 | 0.023590 | 1.000000 | 0.954197 | -0.027782 | -0.012887 | 0.462199 | 0.215968 | 0.262188 | 0.266703 | 0.193137 | 0.284769 | -0.202172 | 0.301873 | 0.024681 | -0.059407 | -0.016516 | -0.433099 | 0.040327 | 0.284937 | 0.392743 | -0.048286 | 0.045743 | -0.044819 | 0.303271 | -0.021534 |
| TS% | 0.129556 | 0.273186 | 0.291327 | 0.301693 | 0.192273 | 0.874155 | 0.161128 | 0.080920 | 0.324806 | 0.245922 | 0.251022 | 0.286234 | 0.289777 | 0.311139 | 0.328119 | 0.100635 | 0.181782 | 0.130559 | 0.269752 | 0.313451 | 0.302889 | 0.207475 | 0.077643 | 0.174803 | 0.330474 | 0.012624 | 0.250250 | -0.074478 | -0.068249 | -0.125127 | 0.161376 | 0.143923 | 0.201288 | -0.009359 | 0.954197 | 1.000000 | 0.095745 | -0.034267 | 0.573614 | 0.268354 | 0.292533 | 0.290523 | 0.223088 | 0.259361 | -0.169281 | 0.248755 | 0.036528 | -0.082956 | -0.052410 | -0.290967 | 0.047253 | 0.229186 | 0.337876 | -0.010654 | 0.059558 | 0.012918 | 0.239762 | 0.019660 |
| USG% | -0.017307 | 0.414689 | 0.715835 | 0.698401 | 0.721353 | 0.004980 | 0.459120 | 0.498755 | 0.110282 | 0.704996 | 0.695193 | 0.170044 | 0.034326 | 0.370118 | 0.290217 | 0.494615 | 0.654152 | 0.257482 | 0.108259 | 0.214853 | 0.604438 | 0.368663 | 0.217692 | 0.168016 | 0.260694 | 0.004033 | 0.201123 | 0.447669 | -0.105039 | -0.125611 | -0.160629 | 0.104302 | 0.005275 | -0.017609 | -0.027782 | 0.095745 | 1.000000 | -0.056733 | 0.539191 | 0.357500 | 0.146066 | 0.140026 | 0.116820 | 0.109779 | -0.078586 | 0.049148 | 0.266322 | -0.136954 | 0.068467 | 0.176264 | -0.105585 | -0.044664 | -0.336992 | 0.398588 | -0.136873 | 0.528714 | -0.453046 | 0.472688 |
| PACE | -0.099569 | -0.303390 | -0.217161 | -0.215185 | -0.218987 | 0.015055 | -0.176279 | -0.175473 | -0.138321 | -0.183652 | -0.178890 | -0.183344 | -0.159435 | -0.218660 | -0.215727 | -0.162441 | -0.147383 | -0.215612 | -0.137686 | -0.187442 | -0.246247 | -0.157062 | -0.045190 | -0.120899 | -0.123759 | -0.151833 | -0.004644 | -0.145246 | -0.094665 | -0.103250 | -0.035100 | 0.005398 | 0.001926 | 0.122754 | -0.012887 | -0.034267 | -0.056733 | 1.000000 | -0.191414 | -0.299818 | -0.281333 | -0.288500 | -0.204735 | 0.009373 | 0.004254 | 0.046714 | -0.054686 | -0.043545 | 0.206638 | -0.000420 | 0.012979 | 0.064373 | 0.049072 | -0.086627 | -0.067789 | -0.118438 | 0.030164 | -0.048140 |
| PIE | 0.103656 | 0.428535 | 0.596013 | 0.603178 | 0.525808 | 0.485037 | 0.254609 | 0.235118 | 0.183809 | 0.589804 | 0.591538 | 0.222807 | 0.350115 | 0.573347 | 0.544821 | 0.446034 | 0.469491 | 0.341881 | 0.342870 | 0.269976 | 0.628636 | 0.506514 | 0.265024 | 0.305343 | 0.337983 | -0.109561 | 0.330261 | 0.318568 | 0.043066 | 0.026637 | 0.120321 | 0.405442 | 0.357209 | -0.258277 | 0.462199 | 0.573614 | 0.539191 | -0.191414 | 1.000000 | 0.388253 | 0.268749 | 0.282877 | 0.187728 | 0.286808 | -0.230211 | 0.224409 | 0.237045 | -0.226035 | 0.004625 | 0.030655 | -0.023429 | 0.135928 | -0.018838 | 0.179143 | -0.067677 | 0.266234 | -0.148897 | 0.259167 |
| POSS | 0.108307 | 0.877030 | 0.767651 | 0.769704 | 0.771661 | 0.187085 | 0.594304 | 0.605785 | 0.182613 | 0.631733 | 0.626519 | 0.343386 | 0.358589 | 0.673426 | 0.622193 | 0.576156 | 0.646323 | 0.596023 | 0.359320 | 0.631287 | 0.796185 | 0.504705 | 0.187187 | 0.276027 | 0.408410 | 0.247599 | 0.168756 | 0.245004 | 0.118655 | 0.076024 | -0.095092 | 0.060621 | 0.016133 | -0.098678 | 0.215968 | 0.268354 | 0.357500 | -0.299818 | 0.388253 | 1.000000 | 0.880536 | 0.798682 | 0.753230 | 0.081859 | -0.059853 | 0.019601 | 0.112073 | -0.021104 | -0.016321 | 0.005316 | 0.002416 | -0.019952 | -0.065315 | 0.249614 | 0.026623 | 0.287573 | -0.144785 | 0.230703 |
| GP | 0.085892 | 0.649891 | 0.506893 | 0.513014 | 0.506447 | 0.230157 | 0.405484 | 0.406479 | 0.171067 | 0.392619 | 0.392780 | 0.368069 | 0.325323 | 0.506842 | 0.486234 | 0.340179 | 0.404578 | 0.414046 | 0.310445 | 0.514637 | 0.545229 | 0.350709 | 0.106959 | 0.231111 | 0.373148 | 0.244337 | 0.143224 | 0.084290 | 0.094866 | 0.036676 | -0.002859 | 0.066473 | 0.061804 | -0.082968 | 0.262188 | 0.292533 | 0.146066 | -0.281333 | 0.268749 | 0.880536 | 1.000000 | 0.891308 | 0.872384 | 0.089531 | -0.057966 | 0.054087 | 0.013879 | -0.007669 | -0.031626 | -0.083367 | 0.065297 | 0.047647 | 0.093275 | 0.152519 | 0.119015 | 0.137737 | 0.005317 | 0.105739 |
| W | 0.172614 | 0.589629 | 0.478508 | 0.482467 | 0.464970 | 0.216416 | 0.404778 | 0.392165 | 0.174296 | 0.366411 | 0.361620 | 0.330139 | 0.273328 | 0.476880 | 0.447302 | 0.336295 | 0.358509 | 0.378905 | 0.288612 | 0.433790 | 0.515402 | 0.352824 | 0.146937 | 0.442510 | 0.391776 | 0.115898 | 0.235503 | 0.100258 | 0.126180 | 0.063449 | -0.015595 | 0.063275 | 0.058817 | -0.093460 | 0.266703 | 0.290523 | 0.140026 | -0.288500 | 0.282877 | 0.798682 | 0.891308 | 1.000000 | 0.555932 | 0.040164 | -0.013641 | 0.014367 | 0.016147 | 0.033421 | -0.013747 | -0.090891 | 0.044638 | 0.008259 | 0.090901 | 0.121253 | 0.097203 | 0.148845 | 0.017888 | 0.075885 |
| L | -0.028632 | 0.555768 | 0.413408 | 0.420363 | 0.427187 | 0.188630 | 0.306983 | 0.322406 | 0.125709 | 0.324762 | 0.330223 | 0.318860 | 0.301741 | 0.415071 | 0.409177 | 0.261091 | 0.355206 | 0.350574 | 0.257987 | 0.475816 | 0.443913 | 0.262576 | 0.037674 | -0.053380 | 0.261719 | 0.322996 | 0.008674 | 0.046441 | 0.037883 | -0.001167 | 0.011572 | 0.053647 | 0.049895 | -0.051346 | 0.193137 | 0.223088 | 0.116820 | -0.204735 | 0.187728 | 0.753230 | 0.872384 | 0.555932 | 1.000000 | 0.120838 | -0.091564 | 0.083670 | 0.008036 | -0.050093 | -0.043160 | -0.054846 | 0.071585 | 0.078449 | 0.073001 | 0.148891 | 0.113396 | 0.092042 | -0.009538 | 0.112041 |
| %FGA2PT | -0.071695 | 0.073892 | 0.135628 | 0.196702 | 0.079130 | 0.616131 | -0.427402 | -0.438002 | -0.323310 | 0.219283 | 0.281697 | -0.113781 | 0.547570 | 0.315229 | 0.412976 | 0.066875 | 0.195102 | 0.035664 | 0.388163 | 0.244750 | 0.224107 | 0.310922 | 0.093627 | -0.033441 | 0.059041 | 0.064602 | 0.006794 | 0.056102 | -0.179552 | -0.073228 | 0.609756 | 0.391610 | 0.549176 | 0.178528 | 0.284769 | 0.259361 | 0.109779 | 0.009373 | 0.286808 | 0.081859 | 0.089531 | 0.040164 | 0.120838 | 1.000000 | -0.961983 | 0.906288 | 0.107542 | -0.883348 | -0.013812 | 0.066598 | 0.058467 | 0.843660 | 0.093234 | 0.133680 | -0.342752 | -0.061382 | -0.282070 | 0.380810 |
| %FGA3PT | 0.090077 | -0.044962 | -0.114121 | -0.174949 | -0.055504 | -0.549303 | 0.449799 | 0.462237 | 0.364654 | -0.205254 | -0.267137 | 0.171839 | -0.532101 | -0.296860 | -0.394173 | -0.049294 | -0.194467 | -0.010075 | -0.374269 | -0.221258 | -0.198628 | -0.305532 | -0.091947 | 0.050389 | 0.059130 | -0.080261 | 0.094816 | -0.029798 | 0.208245 | 0.109251 | -0.592998 | -0.392438 | -0.539655 | -0.291301 | -0.202172 | -0.169281 | -0.078586 | 0.004254 | -0.230211 | -0.059853 | -0.057966 | -0.013641 | -0.091564 | -0.961983 | 1.000000 | -0.871742 | -0.094031 | 0.915876 | 0.034851 | -0.192410 | -0.022665 | -0.814764 | -0.052922 | -0.100613 | 0.392324 | 0.073826 | 0.340071 | -0.353513 |
| %PTS2PT | -0.089300 | 0.014108 | 0.047683 | 0.120044 | 0.006700 | 0.625311 | -0.444988 | -0.435696 | -0.401728 | 0.082129 | 0.143710 | -0.132955 | 0.488431 | 0.254667 | 0.348680 | -0.003249 | 0.112423 | -0.024276 | 0.351937 | 0.199666 | 0.135543 | 0.239307 | 0.062766 | -0.042718 | 0.017087 | 0.010741 | 0.006822 | -0.018400 | -0.167999 | -0.103968 | 0.570164 | 0.377686 | 0.527960 | 0.198585 | 0.301873 | 0.248755 | 0.049148 | 0.046714 | 0.224409 | 0.019601 | 0.054087 | 0.014367 | 0.083670 | 0.906288 | -0.871742 | 1.000000 | 0.096270 | -0.860144 | 0.122309 | -0.148595 | 0.112691 | 0.938638 | 0.192745 | 0.139458 | -0.268679 | -0.145979 | -0.209775 | 0.364627 |
| %PTS2PT MR | 0.146203 | 0.177274 | 0.248702 | 0.255441 | 0.279357 | 0.048810 | 0.168861 | 0.176347 | 0.051840 | 0.197742 | 0.185288 | 0.065192 | -0.137768 | 0.039589 | -0.013234 | 0.222678 | 0.178602 | 0.107104 | -0.047344 | -0.013112 | 0.192292 | 0.035567 | 0.011781 | -0.101177 | -0.001807 | 0.094712 | -0.058782 | 0.231344 | 0.103530 | 0.046679 | -0.234154 | -0.140199 | -0.194199 | -0.141550 | 0.024681 | 0.036528 | 0.266322 | -0.054686 | 0.237045 | 0.112073 | 0.013879 | 0.016147 | 0.008036 | 0.107542 | -0.094031 | 0.096270 | 1.000000 | -0.063605 | -0.001577 | -0.051532 | 0.043756 | -0.252933 | -0.172408 | 0.295634 | 0.001683 | 0.211830 | -0.236185 | 0.297817 |
| %PTS3PT | 0.097349 | -0.004497 | -0.089468 | -0.138863 | -0.032357 | -0.435936 | 0.473784 | 0.451638 | 0.503311 | -0.224059 | -0.277138 | 0.103377 | -0.486455 | -0.260779 | -0.352234 | -0.024176 | -0.166132 | 0.018904 | -0.357623 | -0.170082 | -0.165128 | -0.280752 | -0.087133 | 0.020576 | -0.018886 | -0.023834 | -0.000317 | 0.007992 | 0.248542 | 0.158414 | -0.564996 | -0.383610 | -0.519679 | -0.255983 | -0.059407 | -0.082956 | -0.136954 | -0.043545 | -0.226035 | -0.021104 | -0.007669 | 0.033421 | -0.050093 | -0.883348 | 0.915876 | -0.860144 | -0.063605 | 1.000000 | -0.026264 | -0.376576 | -0.038917 | -0.813998 | -0.046794 | -0.080777 | 0.421201 | 0.097277 | 0.410379 | -0.339708 |
| %PTSFBPs | -0.140179 | -0.062308 | -0.039062 | -0.037951 | -0.022375 | -0.013374 | 0.016379 | 0.027958 | -0.066224 | -0.062077 | -0.060697 | -0.096920 | -0.180319 | -0.131785 | -0.156709 | -0.031898 | -0.069403 | 0.009756 | -0.170207 | -0.143138 | -0.079522 | -0.129537 | -0.038321 | 0.080358 | -0.059495 | -0.273729 | 0.119638 | -0.030535 | 0.022469 | -0.061983 | -0.076489 | -0.066681 | -0.054421 | -0.076365 | -0.016516 | -0.052410 | 0.068467 | 0.206638 | 0.004625 | -0.016321 | -0.031626 | -0.013747 | -0.043160 | -0.013812 | 0.034851 | 0.122309 | -0.001577 | -0.026264 | 1.000000 | -0.171225 | 0.108909 | 0.119437 | 0.078656 | 0.009817 | -0.001536 | -0.029530 | 0.055374 | 0.015230 |
| %PTSFT | -0.026550 | -0.016905 | 0.086858 | 0.051201 | 0.050566 | -0.290526 | -0.110369 | -0.084307 | -0.246186 | 0.285242 | 0.276308 | 0.041051 | 0.056030 | 0.043063 | 0.049625 | 0.052775 | 0.117911 | 0.007439 | 0.054157 | -0.032886 | 0.073973 | 0.109683 | 0.054935 | 0.037694 | 0.005583 | 0.026700 | -0.011774 | 0.017924 | -0.176749 | -0.118304 | 0.059860 | 0.057776 | 0.048651 | 0.135623 | -0.433099 | -0.290967 | 0.176264 | -0.000420 | 0.030655 | 0.005316 | -0.083367 | -0.090891 | -0.054846 | 0.066598 | -0.192410 | -0.148595 | -0.051532 | -0.376576 | -0.171225 | 1.000000 | -0.129222 | -0.126616 | -0.259349 | -0.096680 | -0.328641 | 0.076533 | -0.414645 | -0.003624 |
| %PTSOFFTO | -0.116107 | 0.018578 | -0.016638 | -0.015458 | -0.011706 | 0.044939 | -0.002219 | 0.000845 | 0.039068 | -0.020497 | -0.019945 | 0.229101 | -0.027428 | -0.057218 | -0.053086 | -0.021240 | -0.044408 | 0.139936 | -0.034144 | 0.005814 | -0.014394 | -0.077705 | -0.021254 | -0.003015 | 0.022224 | 0.001298 | 0.016400 | -0.023052 | 0.095988 | 0.022575 | -0.023574 | -0.075679 | -0.066231 | -0.058221 | 0.040327 | 0.047253 | -0.105585 | 0.012979 | -0.023429 | 0.002416 | 0.065297 | 0.044638 | 0.071585 | 0.058467 | -0.022665 | 0.112691 | 0.043756 | -0.038917 | 0.108909 | -0.129222 | 1.000000 | 0.094287 | 0.127642 | 0.086077 | 0.101173 | 0.007002 | 0.019339 | 0.106465 |
| %PTSPITP | -0.137336 | -0.047885 | -0.039999 | 0.027996 | -0.090487 | 0.590949 | -0.491153 | -0.484758 | -0.408444 | 0.011176 | 0.075335 | -0.151972 | 0.522477 | 0.233679 | 0.343391 | -0.080517 | 0.047135 | -0.060910 | 0.358421 | 0.198407 | 0.064932 | 0.220153 | 0.056841 | -0.006536 | 0.017162 | -0.022421 | 0.026968 | -0.098144 | -0.199121 | -0.117218 | 0.635393 | 0.415706 | 0.580501 | 0.242068 | 0.284937 | 0.229186 | -0.044664 | 0.064373 | 0.135928 | -0.019952 | 0.047647 | 0.008259 | 0.078449 | 0.843660 | -0.814764 | 0.938638 | -0.252933 | -0.813998 | 0.119437 | -0.126616 | 0.094287 | 1.000000 | 0.247104 | 0.033092 | -0.261742 | -0.215453 | -0.122006 | 0.251159 |
| 2FGM%AST | 0.015655 | -0.115571 | -0.197412 | -0.174075 | -0.241023 | 0.374713 | -0.174183 | -0.201537 | -0.020505 | -0.226396 | -0.202141 | 0.100670 | 0.247337 | 0.076586 | 0.137180 | -0.425670 | -0.302428 | -0.260259 | 0.223246 | 0.097830 | -0.172276 | -0.036590 | -0.079160 | -0.031007 | 0.008063 | 0.031791 | -0.013288 | -0.531034 | -0.206284 | -0.375948 | 0.323206 | 0.243443 | 0.331210 | -0.081048 | 0.392743 | 0.337876 | -0.336992 | 0.049072 | -0.018838 | -0.065315 | 0.093275 | 0.090901 | 0.073001 | 0.093234 | -0.052922 | 0.192745 | -0.172408 | -0.046794 | 0.078656 | -0.259349 | 0.127642 | 0.247104 | 1.000000 | -0.697542 | 0.236194 | -0.486182 | 0.773596 | -0.667663 |
| 2FGM%UAST | 0.039718 | 0.305266 | 0.367293 | 0.355391 | 0.409555 | -0.019165 | 0.295590 | 0.318662 | 0.107288 | 0.339564 | 0.323565 | 0.195130 | -0.134427 | 0.074334 | 0.013162 | 0.523654 | 0.427283 | 0.324435 | -0.124426 | 0.103337 | 0.340305 | 0.101178 | 0.101356 | 0.072220 | 0.118516 | 0.054166 | 0.059837 | 0.566061 | 0.278951 | 0.323669 | -0.269522 | -0.174812 | -0.242546 | 0.037615 | -0.048286 | -0.010654 | 0.398588 | -0.086627 | 0.179143 | 0.249614 | 0.152519 | 0.121253 | 0.148891 | 0.133680 | -0.100613 | 0.139458 | 0.295634 | -0.080777 | 0.009817 | -0.096680 | 0.086077 | 0.033092 | -0.697542 | 1.000000 | -0.132585 | 0.551627 | -0.743498 | 0.898825 |
| 3FGM%AST | 0.020868 | 0.026122 | -0.058219 | -0.064089 | -0.029832 | -0.121650 | 0.113869 | 0.119421 | 0.584347 | -0.120076 | -0.155252 | 0.256770 | -0.181373 | -0.061567 | -0.102779 | -0.098820 | -0.125464 | -0.014121 | -0.086044 | 0.047883 | -0.084370 | -0.148219 | -0.039992 | -0.021278 | 0.027437 | 0.045775 | -0.006123 | -0.081342 | 0.129007 | 0.041878 | -0.258817 | -0.122456 | -0.195807 | -0.208840 | 0.045743 | 0.059558 | -0.136873 | -0.067789 | -0.067677 | 0.026623 | 0.119015 | 0.097203 | 0.113396 | -0.342752 | 0.392324 | -0.268679 | 0.001683 | 0.421201 | -0.001536 | -0.328641 | 0.101173 | -0.261742 | 0.236194 | -0.132585 | 1.000000 | -0.288741 | 0.452273 | -0.309824 |
| 3FGM%UAST | 0.013203 | 0.356889 | 0.506853 | 0.472211 | 0.527010 | -0.095382 | 0.463847 | 0.476755 | 0.241393 | 0.493395 | 0.469244 | 0.175276 | -0.179885 | 0.115015 | 0.029368 | 0.576870 | 0.522398 | 0.346934 | -0.107780 | 0.071721 | 0.436525 | 0.165092 | 0.156454 | 0.136110 | 0.120799 | 0.082255 | 0.044441 | 0.564587 | 0.241027 | 0.244618 | -0.325879 | -0.203483 | -0.287006 | 0.000252 | -0.044819 | 0.012918 | 0.528714 | -0.118438 | 0.266234 | 0.287573 | 0.137737 | 0.148845 | 0.092042 | -0.061382 | 0.073826 | -0.145979 | 0.211830 | 0.097277 | -0.029530 | 0.076533 | 0.007002 | -0.215453 | -0.486182 | 0.551627 | -0.288741 | 1.000000 | -0.562287 | 0.631530 |
| FGM%AST | 0.027992 | -0.181382 | -0.315100 | -0.311867 | -0.337194 | 0.138259 | -0.079265 | -0.109815 | 0.151911 | -0.366770 | -0.361292 | 0.062292 | 0.048466 | -0.069850 | -0.036760 | -0.467892 | -0.425212 | -0.266962 | 0.068285 | -0.023693 | -0.300949 | -0.164574 | -0.127723 | -0.055969 | -0.063968 | -0.014693 | -0.041133 | -0.515915 | -0.118755 | -0.234344 | 0.119056 | 0.076435 | 0.120907 | -0.196610 | 0.303271 | 0.239762 | -0.453046 | 0.030164 | -0.148897 | -0.144785 | 0.005317 | 0.017888 | -0.009538 | -0.282070 | 0.340071 | -0.209775 | -0.236185 | 0.410379 | 0.055374 | -0.414645 | 0.019339 | -0.122006 | 0.773596 | -0.743498 | 0.452273 | -0.562287 | 1.000000 | -0.853034 |
| FGM%UAST | -0.012590 | 0.289378 | 0.406371 | 0.408323 | 0.426410 | 0.105023 | 0.150268 | 0.175289 | -0.023200 | 0.426714 | 0.426745 | 0.088549 | -0.006011 | 0.144197 | 0.106847 | 0.546260 | 0.500964 | 0.325863 | -0.038397 | 0.125993 | 0.394651 | 0.199610 | 0.142270 | 0.065693 | 0.097334 | 0.029996 | 0.057901 | 0.597266 | 0.220256 | 0.301080 | -0.109940 | -0.070089 | -0.098675 | 0.143277 | -0.021534 | 0.019660 | 0.472688 | -0.048140 | 0.259167 | 0.230703 | 0.105739 | 0.075885 | 0.112041 | 0.380810 | -0.353513 | 0.364627 | 0.297817 | -0.339708 | 0.015230 | -0.003624 | 0.106465 | 0.251159 | -0.667663 | 0.898825 | -0.309824 | 0.631530 | -0.853034 | 1.000000 |
# Lets now only see the correlations that meet certain thresholds. I set mine as 0.75. I also eliminate those that are the correlation of a column against itself
threshold = 0.75
positive_correlations = correlations[(correlations > threshold) & (correlations < 1)]
# Now we print the correlations and columns that have those correlations that meet the threshold
for column in positive_correlations:
above_threshold = positive_correlations[column].dropna()
for col, corr in above_threshold.iteritems():
print(f"Correlation: {corr:.3f}, Columns: {column} and {col}")
Correlation: 0.874, Columns: Min and PTS Correlation: 0.879, Columns: Min and FGM Correlation: 0.884, Columns: Min and FGA Correlation: 0.750, Columns: Min and DREB Correlation: 0.787, Columns: Min and TOV Correlation: 0.753, Columns: Min and PF Correlation: 0.918, Columns: Min and FP Correlation: 0.877, Columns: Min and POSS Correlation: 0.874, Columns: PTS and Min Correlation: 0.992, Columns: PTS and FGM Correlation: 0.982, Columns: PTS and FGA Correlation: 0.900, Columns: PTS and FTM Correlation: 0.890, Columns: PTS and FTA Correlation: 0.852, Columns: PTS and TOV Correlation: 0.953, Columns: PTS and FP Correlation: 0.768, Columns: PTS and POSS Correlation: 0.879, Columns: FGM and Min Correlation: 0.992, Columns: FGM and PTS Correlation: 0.977, Columns: FGM and FGA Correlation: 0.862, Columns: FGM and FTM Correlation: 0.862, Columns: FGM and FTA Correlation: 0.850, Columns: FGM and TOV Correlation: 0.959, Columns: FGM and FP Correlation: 0.770, Columns: FGM and POSS Correlation: 0.884, Columns: FGA and Min Correlation: 0.982, Columns: FGA and PTS Correlation: 0.977, Columns: FGA and FGM Correlation: 0.775, Columns: FGA and 3PA Correlation: 0.842, Columns: FGA and FTM Correlation: 0.829, Columns: FGA and FTA Correlation: 0.849, Columns: FGA and TOV Correlation: 0.931, Columns: FGA and FP Correlation: 0.772, Columns: FGA and POSS Correlation: 0.917, Columns: FG% and EFG% Correlation: 0.874, Columns: FG% and TS% Correlation: 0.982, Columns: 3PM and 3PA Correlation: 0.775, Columns: 3PA and FGA Correlation: 0.982, Columns: 3PA and 3PM Correlation: 0.900, Columns: FTM and PTS Correlation: 0.862, Columns: FTM and FGM Correlation: 0.842, Columns: FTM and FGA Correlation: 0.986, Columns: FTM and FTA Correlation: 0.787, Columns: FTM and TOV Correlation: 0.851, Columns: FTM and FP Correlation: 0.890, Columns: FTA and PTS Correlation: 0.862, Columns: FTA and FGM Correlation: 0.829, Columns: FTA and FGA Correlation: 0.986, Columns: FTA and FTM Correlation: 0.790, Columns: FTA and TOV Correlation: 0.857, Columns: FTA and FP Correlation: 0.835, Columns: OREB and REB Correlation: 0.750, Columns: DREB and Min Correlation: 0.973, Columns: DREB and REB Correlation: 0.837, Columns: DREB and FP Correlation: 0.784, Columns: DREB and DD2 Correlation: 0.835, Columns: REB and OREB Correlation: 0.973, Columns: REB and DREB Correlation: 0.780, Columns: REB and FP Correlation: 0.781, Columns: REB and DD2 Correlation: 0.832, Columns: AST and TOV Correlation: 0.789, Columns: AST and FP Correlation: 0.829, Columns: AST and AST% Correlation: 0.787, Columns: TOV and Min Correlation: 0.852, Columns: TOV and PTS Correlation: 0.850, Columns: TOV and FGM Correlation: 0.849, Columns: TOV and FGA Correlation: 0.787, Columns: TOV and FTM Correlation: 0.790, Columns: TOV and FTA Correlation: 0.832, Columns: TOV and AST Correlation: 0.869, Columns: TOV and FP Correlation: 0.753, Columns: PF and Min Correlation: 0.918, Columns: FP and Min Correlation: 0.953, Columns: FP and PTS Correlation: 0.959, Columns: FP and FGM Correlation: 0.931, Columns: FP and FGA Correlation: 0.851, Columns: FP and FTM Correlation: 0.857, Columns: FP and FTA Correlation: 0.837, Columns: FP and DREB Correlation: 0.780, Columns: FP and REB Correlation: 0.789, Columns: FP and AST Correlation: 0.869, Columns: FP and TOV Correlation: 0.796, Columns: FP and POSS Correlation: 0.784, Columns: DD2 and DREB Correlation: 0.781, Columns: DD2 and REB Correlation: 0.796, Columns: OFFRTG and NETRTG Correlation: 0.796, Columns: NETRTG and OFFRTG Correlation: 0.829, Columns: AST% and AST Correlation: 0.762, Columns: AST% and AST RATIO Correlation: 0.762, Columns: AST RATIO and AST% Correlation: 0.806, Columns: OREB% and REB% Correlation: 0.919, Columns: DREB% and REB% Correlation: 0.806, Columns: REB% and OREB% Correlation: 0.919, Columns: REB% and DREB% Correlation: 0.917, Columns: EFG% and FG% Correlation: 0.954, Columns: EFG% and TS% Correlation: 0.874, Columns: TS% and FG% Correlation: 0.954, Columns: TS% and EFG% Correlation: 0.877, Columns: POSS and Min Correlation: 0.768, Columns: POSS and PTS Correlation: 0.770, Columns: POSS and FGM Correlation: 0.772, Columns: POSS and FGA Correlation: 0.796, Columns: POSS and FP Correlation: 0.881, Columns: POSS and GP Correlation: 0.799, Columns: POSS and W Correlation: 0.753, Columns: POSS and L Correlation: 0.881, Columns: GP and POSS Correlation: 0.891, Columns: GP and W Correlation: 0.872, Columns: GP and L Correlation: 0.799, Columns: W and POSS Correlation: 0.891, Columns: W and GP Correlation: 0.753, Columns: L and POSS Correlation: 0.872, Columns: L and GP Correlation: 0.906, Columns: %FGA2PT and %PTS2PT Correlation: 0.844, Columns: %FGA2PT and %PTSPITP Correlation: 0.916, Columns: %FGA3PT and %PTS3PT Correlation: 0.906, Columns: %PTS2PT and %FGA2PT Correlation: 0.939, Columns: %PTS2PT and %PTSPITP Correlation: 0.916, Columns: %PTS3PT and %FGA3PT Correlation: 0.844, Columns: %PTSPITP and %FGA2PT Correlation: 0.939, Columns: %PTSPITP and %PTS2PT Correlation: 0.774, Columns: 2FGM%AST and FGM%AST Correlation: 0.899, Columns: 2FGM%UAST and FGM%UAST Correlation: 0.774, Columns: FGM%AST and 2FGM%AST Correlation: 0.899, Columns: FGM%UAST and 2FGM%UAST
# This one is for variables that are negatively correlated
threshold = -0.75
negative_correlations = correlations[correlations < threshold]
for column in negative_correlations:
above_threshold = negative_correlations[column].dropna()
for col, corr in above_threshold.iteritems():
print(f"Correlation: {corr:.3f}, Columns: {column} and {col}")
Correlation: -0.962, Columns: %FGA2PT and %FGA3PT Correlation: -0.883, Columns: %FGA2PT and %PTS3PT Correlation: -0.962, Columns: %FGA3PT and %FGA2PT Correlation: -0.872, Columns: %FGA3PT and %PTS2PT Correlation: -0.815, Columns: %FGA3PT and %PTSPITP Correlation: -0.872, Columns: %PTS2PT and %FGA3PT Correlation: -0.860, Columns: %PTS2PT and %PTS3PT Correlation: -0.883, Columns: %PTS3PT and %FGA2PT Correlation: -0.860, Columns: %PTS3PT and %PTS2PT Correlation: -0.814, Columns: %PTS3PT and %PTSPITP Correlation: -0.815, Columns: %PTSPITP and %FGA3PT Correlation: -0.814, Columns: %PTSPITP and %PTS3PT Correlation: -0.853, Columns: FGM%AST and FGM%UAST Correlation: -0.853, Columns: FGM%UAST and FGM%AST
There are many variables that are highly correlated that make sense. For example, FTM and FTA or REB and DREB. Knowing what we know about basketball, it makes sense that as the number of defensive rebounds go up, so do the number of total rebounds. In addition, the negative correlations make sense as well. For example, if you take more 2 point shots, then your percentage of 3 point shots will go down. There are many examples of this. These heavily correlated features will need to be taken care of, or we will need to use an algorithm that is not sensitive to high correlations
There are 60 columns in our dataframe, 58 of which are numeric. It's difficult to view all of those at the same time, so we will split them up into 3 sections so we can get a good look at histograms of all of them. I find it easier to look at the distibution using a KDE plot, so I have included those as well.
num_rows = 5
num_cols = 4
# Create subplots for histograms
fig, axes = plt.subplots(num_rows, num_cols, figsize=(15, 10))
# Generate histograms for all variables
for i, column in enumerate(df.columns[2:22]):
ax = axes[i // num_cols, i % num_cols] # Get the appropriate subplot
ax.hist(df[column])
ax.set_xlabel(column)
ax.set_ylabel('Frequency')
# Adjust spacing between subplots
fig.tight_layout()
# Display the plot
plt.show()
num_rows = 5
num_cols = 4
# Create subplots for histograms
fig, axes = plt.subplots(num_rows, num_cols, figsize=(15, 10))
# Generate KDE plots for all variables
for i, column in enumerate(df.columns[2:22]):
ax = axes[i // num_cols, i % num_cols] # Get the appropriate subplot
sns.kdeplot(data=df[column], ax=ax)
ax.set_xlabel(column)
ax.set_ylabel('Density')
# Adjust spacing between subplots
fig.tight_layout()
# Display the plot
plt.show()
num_rows = 5
num_cols = 4
# Create subplots for histograms
fig, axes = plt.subplots(num_rows, num_cols, figsize=(15, 10))
# Generate histograms for all variables
for i, column in enumerate(df.columns[22:42]):
ax = axes[i // num_cols, i % num_cols] # Get the appropriate subplot
ax.hist(df[column])
ax.set_xlabel(column)
ax.set_ylabel('Frequency')
# Adjust spacing between subplots
fig.tight_layout()
# Display the plot
plt.show()
num_rows = 5
num_cols = 4
# Create subplots for histograms
fig, axes = plt.subplots(num_rows, num_cols, figsize=(15, 10))
# Generate KDE plots for all variables
for i, column in enumerate(df.columns[22:42]):
ax = axes[i // num_cols, i % num_cols] # Get the appropriate subplot
sns.kdeplot(data=df[column], ax=ax)
ax.set_xlabel(column)
ax.set_ylabel('Density')
# Adjust spacing between subplots
fig.tight_layout()
# Display the plot
plt.show()
num_rows = 5
num_cols = 4
# Create subplots for histograms
fig, axes = plt.subplots(num_rows, num_cols, figsize=(15, 10))
# Generate histograms for all variables
for i, column in enumerate(df.columns[42:]):
ax = axes[i // num_cols, i % num_cols] # Get the appropriate subplot
ax.hist(df[column])
ax.set_xlabel(column)
ax.set_ylabel('Frequency')
# Hide empty subplots
if i < num_rows * num_cols - 1:
for j in range(i + 1, num_rows * num_cols):
fig.delaxes(axes[j // num_cols, j % num_cols])
# Adjust spacing between subplots
fig.tight_layout()
# Display the plot
plt.show()
num_rows = 5
num_cols = 4
# Create subplots for histograms
fig, axes = plt.subplots(num_rows, num_cols, figsize=(15, 10))
# Generate KDE plots for all variables
for i, column in enumerate(df.columns[42:]):
ax = axes[i // num_cols, i % num_cols] # Get the appropriate subplot
sns.kdeplot(data=df[column], ax=ax)
ax.set_xlabel(column)
ax.set_ylabel('Density')
# Hide empty subplots
if i < num_rows * num_cols - 1:
for j in range(i + 1, num_rows * num_cols):
fig.delaxes(axes[j // num_cols, j % num_cols])
# Adjust spacing between subplots
fig.tight_layout()
# Display the plot
plt.show()
# If you want a closer look at each histogram, you can run this code. It will be commented out for now since there are many graphs
#for column in df.columns[2:]:
# plt.hist(df[column])
# plt.xlabel(column)
# plt.ylabel('Frequency')
#plt.show()
There are a number of things we can see from these graphs. Many of the offensive statistics are positively skewed. This makes sense given that there are rather few players that would have large amounts of points, rebounds, assists, etc. in a game. These players would be considered superstars, and there aren't that many in the league. There are also several variables that are not skewed or at least close to normal. These include DEFRTG, NETRTG, EFG%, TS%, PIE and PACE. There are other variables that are oddly distributed, such as W, POSS and Min.
The main point of my project is to try to cluster the different players in the NBA based on their statistics from this past year. That involves some sort of clustering. I will try to just run the clusters and look at the labels of each, and later I will use dimension reduction to be able to visualize the clusters a little better.
df.head()
| Player | Team | Age | Min | PTS | FGM | FGA | FG% | 3PM | 3PA | 3P% | FTM | FTA | FT% | OREB | DREB | REB | AST | TOV | STL | BLK | PF | FP | DD2 | TD3 | +/- | OFFRTG | DEFRTG | NETRTG | AST% | AST/TO | AST RATIO | OREB% | DREB% | REB% | TO RATIO | EFG% | TS% | USG% | PACE | PIE | POSS | GP | W | L | %FGA2PT | %FGA3PT | %PTS2PT | %PTS2PT MR | %PTS3PT | %PTSFBPs | %PTSFT | %PTSOFFTO | %PTSPITP | 2FGM%AST | 2FGM%UAST | 3FGM%AST | 3FGM%UAST | FGM%AST | FGM%UAST | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Joel Embiid | PHI | 29 | 34.6 | 33.1 | 11.0 | 20.1 | 54.8 | 1.0 | 3.0 | 33.0 | 10.0 | 11.7 | 85.7 | 1.7 | 8.4 | 10.2 | 4.2 | 3.4 | 1.0 | 1.7 | 3.1 | 56.2 | 39.0 | 1.0 | 6.4 | 119.0 | 110.2 | 8.8 | 23.3 | 1.21 | 12.7 | 5.7 | 24.3 | 15.7 | 10.5 | 57.3 | 65.5 | 37.0 | 97.34 | 21.3 | 4639 | 66 | 43 | 23 | 84.9 | 15.1 | 60.7 | 15.7 | 9.1 | 6.3 | 30.2 | 12.7 | 45.0 | 60.0 | 40.0 | 89.4 | 10.6 | 62.6 | 37.4 |
| 1 | Luka Doncic | DAL | 24 | 36.2 | 32.4 | 10.9 | 22.0 | 49.6 | 2.8 | 8.2 | 34.2 | 7.8 | 10.5 | 74.2 | 0.8 | 7.8 | 8.6 | 8.0 | 3.6 | 1.4 | 0.5 | 2.5 | 56.8 | 36.0 | 10.0 | 1.9 | 118.1 | 116.0 | 2.1 | 40.8 | 2.24 | 21.1 | 2.4 | 22.4 | 12.4 | 9.4 | 56.0 | 60.9 | 36.8 | 97.63 | 20.2 | 4874 | 66 | 33 | 33 | 62.7 | 37.3 | 50.0 | 8.5 | 26.0 | 5.2 | 24.0 | 14.0 | 41.4 | 13.1 | 86.9 | 21.6 | 78.4 | 15.3 | 84.7 |
| 2 | Damian Lillard | POR | 32 | 36.3 | 32.2 | 9.6 | 20.7 | 46.3 | 4.2 | 11.3 | 37.1 | 8.8 | 9.6 | 91.4 | 0.8 | 4.0 | 4.8 | 7.3 | 3.3 | 0.9 | 0.3 | 1.9 | 49.1 | 16.0 | 2.0 | 1.8 | 119.5 | 117.4 | 2.1 | 33.8 | 2.23 | 20.8 | 2.2 | 11.2 | 6.8 | 9.3 | 56.4 | 64.5 | 33.1 | 99.78 | 17.3 | 4385 | 58 | 27 | 31 | 45.3 | 54.7 | 33.4 | 6.0 | 39.2 | 8.6 | 27.4 | 11.9 | 27.4 | 15.7 | 84.3 | 51.6 | 48.4 | 31.5 | 68.5 |
| 3 | Shai Gilgeous-Alexander | OKC | 24 | 35.5 | 31.4 | 10.4 | 20.3 | 51.0 | 0.9 | 2.5 | 34.5 | 9.8 | 10.9 | 90.5 | 0.9 | 4.0 | 4.8 | 5.5 | 2.8 | 1.6 | 1.0 | 2.8 | 50.4 | 3.0 | 0.0 | 2.2 | 116.1 | 113.3 | 2.7 | 24.9 | 1.93 | 16.4 | 2.3 | 10.9 | 6.5 | 8.5 | 53.1 | 62.6 | 31.8 | 103.47 | 17.5 | 5211 | 68 | 33 | 35 | 87.8 | 12.2 | 60.5 | 9.7 | 8.1 | 12.7 | 31.4 | 17.7 | 50.8 | 20.1 | 79.9 | 31.0 | 69.0 | 21.0 | 79.0 |
| 4 | Giannis Antetokounmpo | MIL | 28 | 32.1 | 31.1 | 11.2 | 20.3 | 55.3 | 0.7 | 2.7 | 27.5 | 7.9 | 12.3 | 64.5 | 2.2 | 9.6 | 11.8 | 5.7 | 3.9 | 0.8 | 0.8 | 3.1 | 54.8 | 46.0 | 6.0 | 5.4 | 116.4 | 109.2 | 7.2 | 31.4 | 1.46 | 16.2 | 6.5 | 26.8 | 17.1 | 11.1 | 57.2 | 60.5 | 37.3 | 103.63 | 20.4 | 4380 | 63 | 47 | 16 | 86.6 | 13.4 | 67.4 | 7.0 | 7.2 | 17.5 | 25.4 | 12.9 | 60.3 | 41.5 | 58.5 | 61.7 | 38.3 | 42.9 | 57.1 |
#We need to drop all non-numeric columns. We will drop Player and Team.
df1 = df.drop(['Player', 'Team'], axis=1)
df1.head()
| Age | Min | PTS | FGM | FGA | FG% | 3PM | 3PA | 3P% | FTM | FTA | FT% | OREB | DREB | REB | AST | TOV | STL | BLK | PF | FP | DD2 | TD3 | +/- | OFFRTG | DEFRTG | NETRTG | AST% | AST/TO | AST RATIO | OREB% | DREB% | REB% | TO RATIO | EFG% | TS% | USG% | PACE | PIE | POSS | GP | W | L | %FGA2PT | %FGA3PT | %PTS2PT | %PTS2PT MR | %PTS3PT | %PTSFBPs | %PTSFT | %PTSOFFTO | %PTSPITP | 2FGM%AST | 2FGM%UAST | 3FGM%AST | 3FGM%UAST | FGM%AST | FGM%UAST | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 29 | 34.6 | 33.1 | 11.0 | 20.1 | 54.8 | 1.0 | 3.0 | 33.0 | 10.0 | 11.7 | 85.7 | 1.7 | 8.4 | 10.2 | 4.2 | 3.4 | 1.0 | 1.7 | 3.1 | 56.2 | 39.0 | 1.0 | 6.4 | 119.0 | 110.2 | 8.8 | 23.3 | 1.21 | 12.7 | 5.7 | 24.3 | 15.7 | 10.5 | 57.3 | 65.5 | 37.0 | 97.34 | 21.3 | 4639 | 66 | 43 | 23 | 84.9 | 15.1 | 60.7 | 15.7 | 9.1 | 6.3 | 30.2 | 12.7 | 45.0 | 60.0 | 40.0 | 89.4 | 10.6 | 62.6 | 37.4 |
| 1 | 24 | 36.2 | 32.4 | 10.9 | 22.0 | 49.6 | 2.8 | 8.2 | 34.2 | 7.8 | 10.5 | 74.2 | 0.8 | 7.8 | 8.6 | 8.0 | 3.6 | 1.4 | 0.5 | 2.5 | 56.8 | 36.0 | 10.0 | 1.9 | 118.1 | 116.0 | 2.1 | 40.8 | 2.24 | 21.1 | 2.4 | 22.4 | 12.4 | 9.4 | 56.0 | 60.9 | 36.8 | 97.63 | 20.2 | 4874 | 66 | 33 | 33 | 62.7 | 37.3 | 50.0 | 8.5 | 26.0 | 5.2 | 24.0 | 14.0 | 41.4 | 13.1 | 86.9 | 21.6 | 78.4 | 15.3 | 84.7 |
| 2 | 32 | 36.3 | 32.2 | 9.6 | 20.7 | 46.3 | 4.2 | 11.3 | 37.1 | 8.8 | 9.6 | 91.4 | 0.8 | 4.0 | 4.8 | 7.3 | 3.3 | 0.9 | 0.3 | 1.9 | 49.1 | 16.0 | 2.0 | 1.8 | 119.5 | 117.4 | 2.1 | 33.8 | 2.23 | 20.8 | 2.2 | 11.2 | 6.8 | 9.3 | 56.4 | 64.5 | 33.1 | 99.78 | 17.3 | 4385 | 58 | 27 | 31 | 45.3 | 54.7 | 33.4 | 6.0 | 39.2 | 8.6 | 27.4 | 11.9 | 27.4 | 15.7 | 84.3 | 51.6 | 48.4 | 31.5 | 68.5 |
| 3 | 24 | 35.5 | 31.4 | 10.4 | 20.3 | 51.0 | 0.9 | 2.5 | 34.5 | 9.8 | 10.9 | 90.5 | 0.9 | 4.0 | 4.8 | 5.5 | 2.8 | 1.6 | 1.0 | 2.8 | 50.4 | 3.0 | 0.0 | 2.2 | 116.1 | 113.3 | 2.7 | 24.9 | 1.93 | 16.4 | 2.3 | 10.9 | 6.5 | 8.5 | 53.1 | 62.6 | 31.8 | 103.47 | 17.5 | 5211 | 68 | 33 | 35 | 87.8 | 12.2 | 60.5 | 9.7 | 8.1 | 12.7 | 31.4 | 17.7 | 50.8 | 20.1 | 79.9 | 31.0 | 69.0 | 21.0 | 79.0 |
| 4 | 28 | 32.1 | 31.1 | 11.2 | 20.3 | 55.3 | 0.7 | 2.7 | 27.5 | 7.9 | 12.3 | 64.5 | 2.2 | 9.6 | 11.8 | 5.7 | 3.9 | 0.8 | 0.8 | 3.1 | 54.8 | 46.0 | 6.0 | 5.4 | 116.4 | 109.2 | 7.2 | 31.4 | 1.46 | 16.2 | 6.5 | 26.8 | 17.1 | 11.1 | 57.2 | 60.5 | 37.3 | 103.63 | 20.4 | 4380 | 63 | 47 | 16 | 86.6 | 13.4 | 67.4 | 7.0 | 7.2 | 17.5 | 25.4 | 12.9 | 60.3 | 41.5 | 58.5 | 61.7 | 38.3 | 42.9 | 57.1 |
# Next we need to standardize the data since clustering is sensitive to scale. I will use standard scaler
scaler = StandardScaler()
X = scaler.fit_transform(df1)
The common way of determining the correct number of clusters for kmeans clustering is the elbow method. However, the elbow method may not always be the most accurate, particularly when the number of clusters is high. The Calinski-Harabasz score, Davies-Bouldin score and the Silhouette score can all give a more accurate number of clusters. I will run each of the four methods here, and I will compare. Check out the Sklearn notes for each of these and other methods under clustering metrics: https://scikit-learn.org/stable/modules/classes.html#module-sklearn.metrics
In addition, check out this blog post: https://towardsdatascience.com/are-you-still-using-the-elbow-method-5d271b3063bd This was one of the main reasons I chose the metrics I used. Because we don't have the true labels, many of the other clustering metrics don't work. That's why I chose the three scores given above
wcss = []
ch_scores = []
db_scores = []
s_scores = []
for i in range(2, 25):
kmeans = KMeans(n_clusters=i, init='k-means++', random_state = 9)
kmeans.fit(X)
wcss.append(kmeans.inertia_)
ch_scores.append(calinski_harabasz_score(X, kmeans.labels_))
db_scores.append(davies_bouldin_score(X, kmeans.labels_))
s_scores.append(silhouette_score(X, kmeans.labels_))
fig, ax = plt.subplots(2,2, figsize=(12,12))
ax[0,0].plot(range(2, 25), wcss)
ax[0,0].set_title('K Means with Elbow Method (Lowest)')
ax[0,0].set_xticks(range(2,25))
ax[0,1].plot(range(2, 25), ch_scores)
ax[0,1].set_title('K Means with Calinski-Harabasz Method (Highest)')
ax[0,1].set_xticks(range(2,25))
ax[1,0].plot(range(2, 25), s_scores)
ax[1,0].set_title('K Means with Silhouette Method (Highest)')
ax[1,0].set_xticks(range(2,25))
ax[1,1].plot(range(2, 25), db_scores)
ax[1,1].set_title('K Means with Davies-Boulding Method (Lowest)')
ax[1,1].set_xticks(range(2,25))
fig.show()
C:\Users\tfurr\AppData\Local\Temp\ipykernel_4064\2825464523.py:16: UserWarning: Matplotlib is currently using module://matplotlib_inline.backend_inline, which is a non-GUI backend, so cannot show the figure. fig.show()
The four methods don't come to a concensus on the correct number of clusters, though they don't necessarily need to. The thing that concerns me is that none of them are even close. The elbow method is smooth and curved, with no distinct "elbow". The same can be said for the Calinski-Harabasz method. The silhouette method recommends anything from 2 to 5 and maybe 10. The Davies-Boulding method recommends 16 or 20. I suspect there are issues here that we can address, so I'm not even going to perform the clustering here. We already know there are variables that are highly correlated. So I will perform PCA and later on I will also filter the data.
pca1 = PCA(n_components=25)
X_pca1 = pca1.fit_transform(X)
plt.bar(range(1, 21), pca1.explained_variance_ratio_[0:20])
plt.xlabel('Principal Component')
plt.ylabel('Explained Variance Ratio')
plt.xticks(np.arange(1, 21))
plt.show()
pca1.explained_variance_ratio_
array([0.27536352, 0.15784169, 0.09534806, 0.05414205, 0.04786986,
0.04062998, 0.03685692, 0.02583204, 0.02390873, 0.02093006,
0.02021895, 0.01858952, 0.01674272, 0.01608989, 0.01456463,
0.01396763, 0.01317754, 0.01155616, 0.01024437, 0.00932619,
0.00839855, 0.0079888 , 0.00736582, 0.00689754, 0.00620988])
The first 3 components explain almost 53% of the variation in the data so I will just use those 3. You could make an argument for other numbers as well such as 2 or 4. I feel comfortable with the first 3.
pca = PCA(n_components=3)
X_pca = pca.fit_transform(X)
# This is the same as above. I get the 4 scores and plot them for this new PCA data
wcss = []
ch_scores = []
db_scores = []
s_scores = []
for i in range(2, 25):
kmeans = KMeans(n_clusters=i, init='k-means++', random_state = 9)
kmeans.fit(X_pca)
wcss.append(kmeans.inertia_)
ch_scores.append(calinski_harabasz_score(X_pca, kmeans.labels_))
db_scores.append(davies_bouldin_score(X_pca, kmeans.labels_))
s_scores.append(silhouette_score(X_pca, kmeans.labels_))
fig, ax = plt.subplots(2,2, figsize=(12,12))
ax[0,0].plot(range(2, 25), wcss)
ax[0,0].set_title('K Means with Elbow Method (Lowest)')
ax[0,0].set_xticks(range(2,25))
ax[0,1].plot(range(2, 25), ch_scores)
ax[0,1].set_title('K Means with Calinski-Harabasz Method (Highest)')
ax[0,1].set_xticks(range(2,25))
ax[1,0].plot(range(2, 25), s_scores)
ax[1,0].set_title('K Means with Silhouette Method (Highest)')
ax[1,0].set_xticks(range(2,25))
ax[1,1].plot(range(2, 25), db_scores)
ax[1,1].set_title('K Means with Davies-Boulding Method (Lowest)')
ax[1,1].set_xticks(range(2,25))
fig.show()
C:\Users\tfurr\AppData\Local\Temp\ipykernel_4064\2825464523.py:16: UserWarning: Matplotlib is currently using module://matplotlib_inline.backend_inline, which is a non-GUI backend, so cannot show the figure. fig.show()
Performing PCA has given a more concrete answer to the number of clusters. The elbow method might give about 6, though it's still pretty smooth. However, the other 3 methods all have definitive spikes in their respective directions at 6. In addition, the bottom two methods both have distinctive spikes at 17. I will look at the clustering with both 6 and 17 clusters.
kmeans = KMeans(n_clusters=6)
kmeans.fit(X_pca)
labels6 = kmeans.labels_
df['labels_6'] = labels6
kmeans2 = KMeans(n_clusters=17)
kmeans2.fit(X_pca)
labels17 = kmeans2.labels_
df['labels_17'] = labels17
# Make sure it looks correct
df.head()
| Player | Team | Age | Min | PTS | FGM | FGA | FG% | 3PM | 3PA | 3P% | FTM | FTA | FT% | OREB | DREB | REB | AST | TOV | STL | BLK | PF | FP | DD2 | TD3 | +/- | OFFRTG | DEFRTG | NETRTG | AST% | AST/TO | AST RATIO | OREB% | DREB% | REB% | TO RATIO | EFG% | TS% | USG% | PACE | PIE | POSS | GP | W | L | %FGA2PT | %FGA3PT | %PTS2PT | %PTS2PT MR | %PTS3PT | %PTSFBPs | %PTSFT | %PTSOFFTO | %PTSPITP | 2FGM%AST | 2FGM%UAST | 3FGM%AST | 3FGM%UAST | FGM%AST | FGM%UAST | labels_6 | labels_17 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Joel Embiid | PHI | 29 | 34.6 | 33.1 | 11.0 | 20.1 | 54.8 | 1.0 | 3.0 | 33.0 | 10.0 | 11.7 | 85.7 | 1.7 | 8.4 | 10.2 | 4.2 | 3.4 | 1.0 | 1.7 | 3.1 | 56.2 | 39.0 | 1.0 | 6.4 | 119.0 | 110.2 | 8.8 | 23.3 | 1.21 | 12.7 | 5.7 | 24.3 | 15.7 | 10.5 | 57.3 | 65.5 | 37.0 | 97.34 | 21.3 | 4639 | 66 | 43 | 23 | 84.9 | 15.1 | 60.7 | 15.7 | 9.1 | 6.3 | 30.2 | 12.7 | 45.0 | 60.0 | 40.0 | 89.4 | 10.6 | 62.6 | 37.4 | 4 | 13 |
| 1 | Luka Doncic | DAL | 24 | 36.2 | 32.4 | 10.9 | 22.0 | 49.6 | 2.8 | 8.2 | 34.2 | 7.8 | 10.5 | 74.2 | 0.8 | 7.8 | 8.6 | 8.0 | 3.6 | 1.4 | 0.5 | 2.5 | 56.8 | 36.0 | 10.0 | 1.9 | 118.1 | 116.0 | 2.1 | 40.8 | 2.24 | 21.1 | 2.4 | 22.4 | 12.4 | 9.4 | 56.0 | 60.9 | 36.8 | 97.63 | 20.2 | 4874 | 66 | 33 | 33 | 62.7 | 37.3 | 50.0 | 8.5 | 26.0 | 5.2 | 24.0 | 14.0 | 41.4 | 13.1 | 86.9 | 21.6 | 78.4 | 15.3 | 84.7 | 4 | 1 |
| 2 | Damian Lillard | POR | 32 | 36.3 | 32.2 | 9.6 | 20.7 | 46.3 | 4.2 | 11.3 | 37.1 | 8.8 | 9.6 | 91.4 | 0.8 | 4.0 | 4.8 | 7.3 | 3.3 | 0.9 | 0.3 | 1.9 | 49.1 | 16.0 | 2.0 | 1.8 | 119.5 | 117.4 | 2.1 | 33.8 | 2.23 | 20.8 | 2.2 | 11.2 | 6.8 | 9.3 | 56.4 | 64.5 | 33.1 | 99.78 | 17.3 | 4385 | 58 | 27 | 31 | 45.3 | 54.7 | 33.4 | 6.0 | 39.2 | 8.6 | 27.4 | 11.9 | 27.4 | 15.7 | 84.3 | 51.6 | 48.4 | 31.5 | 68.5 | 4 | 1 |
| 3 | Shai Gilgeous-Alexander | OKC | 24 | 35.5 | 31.4 | 10.4 | 20.3 | 51.0 | 0.9 | 2.5 | 34.5 | 9.8 | 10.9 | 90.5 | 0.9 | 4.0 | 4.8 | 5.5 | 2.8 | 1.6 | 1.0 | 2.8 | 50.4 | 3.0 | 0.0 | 2.2 | 116.1 | 113.3 | 2.7 | 24.9 | 1.93 | 16.4 | 2.3 | 10.9 | 6.5 | 8.5 | 53.1 | 62.6 | 31.8 | 103.47 | 17.5 | 5211 | 68 | 33 | 35 | 87.8 | 12.2 | 60.5 | 9.7 | 8.1 | 12.7 | 31.4 | 17.7 | 50.8 | 20.1 | 79.9 | 31.0 | 69.0 | 21.0 | 79.0 | 4 | 1 |
| 4 | Giannis Antetokounmpo | MIL | 28 | 32.1 | 31.1 | 11.2 | 20.3 | 55.3 | 0.7 | 2.7 | 27.5 | 7.9 | 12.3 | 64.5 | 2.2 | 9.6 | 11.8 | 5.7 | 3.9 | 0.8 | 0.8 | 3.1 | 54.8 | 46.0 | 6.0 | 5.4 | 116.4 | 109.2 | 7.2 | 31.4 | 1.46 | 16.2 | 6.5 | 26.8 | 17.1 | 11.1 | 57.2 | 60.5 | 37.3 | 103.63 | 20.4 | 4380 | 63 | 47 | 16 | 86.6 | 13.4 | 67.4 | 7.0 | 7.2 | 17.5 | 25.4 | 12.9 | 60.3 | 41.5 | 58.5 | 61.7 | 38.3 | 42.9 | 57.1 | 4 | 13 |
df['labels_6'].value_counts() # See how many are in each
2 141 0 124 1 84 3 71 5 61 4 58 Name: labels_6, dtype: int64
df['labels_17'].value_counts() # See how many are in each
0 50 7 49 4 49 6 40 16 38 5 37 12 37 9 35 11 35 3 35 1 33 14 30 2 21 15 18 10 15 13 11 8 6 Name: labels_17, dtype: int64
df.loc[df['labels_6'] == 1, 'Player'] # Look at some of the players.
160 Jeenathan Williams
199 Hamidou Diallo
208 Jae'Sean Tate
237 Aleksej Pokusevski
253 T.J. Warren
...
512 Thanasis Antetokounmpo
514 Braxton Key
516 Tyrese Martin
522 Noah Vonleh
523 Chima Moneke
Name: Player, Length: 84, dtype: object
# This creates a new dataframe that we will use for the visualizations. Includes the player names, the PCA components and the cluster labels for 6 and 17
df_new = pd.DataFrame({'Player': df.Player, 'PC1': X_pca[:, 0], 'PC2': X_pca[:, 1], 'PC3': X_pca[:, 2], 'Cluster6': df.labels_6, 'Cluster17':df.labels_17})
# Make sure the cluster labels are categorical
df_new['Cluster6'] = df_new['Cluster6'].astype('category')
df_new['Cluster17'] = df_new['Cluster17'].astype('category')
fig = px.scatter(df_new, x='PC1', y='PC2', color='Cluster6', hover_data=['Player'],
title="2023 NBA Regular Season Stats Clustering with 6 clusters on 2 Components")
fig.show()
fig = px.scatter(df_new, x='PC1', y='PC2', color='Cluster17', hover_data=['Player'],
title="2023 NBA Regular Season Stats Clustering with 17 clusters on 2 Components",
color_discrete_sequence=px.colors.qualitative.Alphabet)
fig.show()
I did include all 3 components in the data, so we may as well look at the 3d version
fig = px.scatter_3d(df_new, x='PC1', y='PC2',z='PC3', color='Cluster6', hover_data=['Player'],
title="2023 NBA Regular Season Stats Clustering with 6 clusters on 3 Components")
fig.show()
fig = px.scatter_3d(df_new, x='PC1', y='PC2', z='PC3', color='Cluster17', hover_data=['Player'],
title="2023 NBA Regular Season Stats Clustering with 17 clusters on 3 Components",
color_discrete_sequence=px.colors.qualitative.Alphabet)
fig.show()
All 4 of these graphs show interesting trends amongst the data. I am not attempting to label any of these clusters, though we could do that. I won't pretend I know enough about the intricate nature of basketball or all the players that played this year. However, I can tell that in the graphs with 6 clusters, cluster 2 would probably be considered high rebounding, traditional big men, cluster 5 would be "superstars" clusters 0 and 3 would probably be more "role players" and clusters 1 and 4 would likely be players that don't play as much. With the 17 clusters it's much more difficult for me to define. However, it's interesting to look at who is in each cluster.
One of the issues I can see is that there a number of players who don't play much. I said before I would filter out these players, but I wanted to see what the clusters would look like with those players in the data set. I will now filter out those players.
df.head()
| Player | Team | Age | Min | PTS | FGM | FGA | FG% | 3PM | 3PA | 3P% | FTM | FTA | FT% | OREB | DREB | REB | AST | TOV | STL | BLK | PF | FP | DD2 | TD3 | +/- | OFFRTG | DEFRTG | NETRTG | AST% | AST/TO | AST RATIO | OREB% | DREB% | REB% | TO RATIO | EFG% | TS% | USG% | PACE | PIE | POSS | GP | W | L | %FGA2PT | %FGA3PT | %PTS2PT | %PTS2PT MR | %PTS3PT | %PTSFBPs | %PTSFT | %PTSOFFTO | %PTSPITP | 2FGM%AST | 2FGM%UAST | 3FGM%AST | 3FGM%UAST | FGM%AST | FGM%UAST | labels_6 | labels_17 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Joel Embiid | PHI | 29 | 34.6 | 33.1 | 11.0 | 20.1 | 54.8 | 1.0 | 3.0 | 33.0 | 10.0 | 11.7 | 85.7 | 1.7 | 8.4 | 10.2 | 4.2 | 3.4 | 1.0 | 1.7 | 3.1 | 56.2 | 39.0 | 1.0 | 6.4 | 119.0 | 110.2 | 8.8 | 23.3 | 1.21 | 12.7 | 5.7 | 24.3 | 15.7 | 10.5 | 57.3 | 65.5 | 37.0 | 97.34 | 21.3 | 4639 | 66 | 43 | 23 | 84.9 | 15.1 | 60.7 | 15.7 | 9.1 | 6.3 | 30.2 | 12.7 | 45.0 | 60.0 | 40.0 | 89.4 | 10.6 | 62.6 | 37.4 | 4 | 13 |
| 1 | Luka Doncic | DAL | 24 | 36.2 | 32.4 | 10.9 | 22.0 | 49.6 | 2.8 | 8.2 | 34.2 | 7.8 | 10.5 | 74.2 | 0.8 | 7.8 | 8.6 | 8.0 | 3.6 | 1.4 | 0.5 | 2.5 | 56.8 | 36.0 | 10.0 | 1.9 | 118.1 | 116.0 | 2.1 | 40.8 | 2.24 | 21.1 | 2.4 | 22.4 | 12.4 | 9.4 | 56.0 | 60.9 | 36.8 | 97.63 | 20.2 | 4874 | 66 | 33 | 33 | 62.7 | 37.3 | 50.0 | 8.5 | 26.0 | 5.2 | 24.0 | 14.0 | 41.4 | 13.1 | 86.9 | 21.6 | 78.4 | 15.3 | 84.7 | 4 | 1 |
| 2 | Damian Lillard | POR | 32 | 36.3 | 32.2 | 9.6 | 20.7 | 46.3 | 4.2 | 11.3 | 37.1 | 8.8 | 9.6 | 91.4 | 0.8 | 4.0 | 4.8 | 7.3 | 3.3 | 0.9 | 0.3 | 1.9 | 49.1 | 16.0 | 2.0 | 1.8 | 119.5 | 117.4 | 2.1 | 33.8 | 2.23 | 20.8 | 2.2 | 11.2 | 6.8 | 9.3 | 56.4 | 64.5 | 33.1 | 99.78 | 17.3 | 4385 | 58 | 27 | 31 | 45.3 | 54.7 | 33.4 | 6.0 | 39.2 | 8.6 | 27.4 | 11.9 | 27.4 | 15.7 | 84.3 | 51.6 | 48.4 | 31.5 | 68.5 | 4 | 1 |
| 3 | Shai Gilgeous-Alexander | OKC | 24 | 35.5 | 31.4 | 10.4 | 20.3 | 51.0 | 0.9 | 2.5 | 34.5 | 9.8 | 10.9 | 90.5 | 0.9 | 4.0 | 4.8 | 5.5 | 2.8 | 1.6 | 1.0 | 2.8 | 50.4 | 3.0 | 0.0 | 2.2 | 116.1 | 113.3 | 2.7 | 24.9 | 1.93 | 16.4 | 2.3 | 10.9 | 6.5 | 8.5 | 53.1 | 62.6 | 31.8 | 103.47 | 17.5 | 5211 | 68 | 33 | 35 | 87.8 | 12.2 | 60.5 | 9.7 | 8.1 | 12.7 | 31.4 | 17.7 | 50.8 | 20.1 | 79.9 | 31.0 | 69.0 | 21.0 | 79.0 | 4 | 1 |
| 4 | Giannis Antetokounmpo | MIL | 28 | 32.1 | 31.1 | 11.2 | 20.3 | 55.3 | 0.7 | 2.7 | 27.5 | 7.9 | 12.3 | 64.5 | 2.2 | 9.6 | 11.8 | 5.7 | 3.9 | 0.8 | 0.8 | 3.1 | 54.8 | 46.0 | 6.0 | 5.4 | 116.4 | 109.2 | 7.2 | 31.4 | 1.46 | 16.2 | 6.5 | 26.8 | 17.1 | 11.1 | 57.2 | 60.5 | 37.3 | 103.63 | 20.4 | 4380 | 63 | 47 | 16 | 86.6 | 13.4 | 67.4 | 7.0 | 7.2 | 17.5 | 25.4 | 12.9 | 60.3 | 41.5 | 58.5 | 61.7 | 38.3 | 42.9 | 57.1 | 4 | 13 |
There's a definitive problem in figuring out where the threshold is for players to be kept in the dataset. I landed on 21 games played and 8 minutes per game. In this sense, the player must have played over a quarter of the games of the NBA season and played at least a sixth of the minutes per game. You could certainly make the case for other numbers as well. I felt this was a reasonable amount that would eliminate garbage time players and players that were injured for most of the season.
filtered_df = df[(df['GP'] > 21) & (df['Min'] > 8)].copy()
filtered_df1 = filtered_df.drop(['Player', 'Team', 'labels_6', 'labels_17'], axis=1)
filtered_df1.head() # Take a look
| Age | Min | PTS | FGM | FGA | FG% | 3PM | 3PA | 3P% | FTM | FTA | FT% | OREB | DREB | REB | AST | TOV | STL | BLK | PF | FP | DD2 | TD3 | +/- | OFFRTG | DEFRTG | NETRTG | AST% | AST/TO | AST RATIO | OREB% | DREB% | REB% | TO RATIO | EFG% | TS% | USG% | PACE | PIE | POSS | GP | W | L | %FGA2PT | %FGA3PT | %PTS2PT | %PTS2PT MR | %PTS3PT | %PTSFBPs | %PTSFT | %PTSOFFTO | %PTSPITP | 2FGM%AST | 2FGM%UAST | 3FGM%AST | 3FGM%UAST | FGM%AST | FGM%UAST | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 29 | 34.6 | 33.1 | 11.0 | 20.1 | 54.8 | 1.0 | 3.0 | 33.0 | 10.0 | 11.7 | 85.7 | 1.7 | 8.4 | 10.2 | 4.2 | 3.4 | 1.0 | 1.7 | 3.1 | 56.2 | 39.0 | 1.0 | 6.4 | 119.0 | 110.2 | 8.8 | 23.3 | 1.21 | 12.7 | 5.7 | 24.3 | 15.7 | 10.5 | 57.3 | 65.5 | 37.0 | 97.34 | 21.3 | 4639 | 66 | 43 | 23 | 84.9 | 15.1 | 60.7 | 15.7 | 9.1 | 6.3 | 30.2 | 12.7 | 45.0 | 60.0 | 40.0 | 89.4 | 10.6 | 62.6 | 37.4 |
| 1 | 24 | 36.2 | 32.4 | 10.9 | 22.0 | 49.6 | 2.8 | 8.2 | 34.2 | 7.8 | 10.5 | 74.2 | 0.8 | 7.8 | 8.6 | 8.0 | 3.6 | 1.4 | 0.5 | 2.5 | 56.8 | 36.0 | 10.0 | 1.9 | 118.1 | 116.0 | 2.1 | 40.8 | 2.24 | 21.1 | 2.4 | 22.4 | 12.4 | 9.4 | 56.0 | 60.9 | 36.8 | 97.63 | 20.2 | 4874 | 66 | 33 | 33 | 62.7 | 37.3 | 50.0 | 8.5 | 26.0 | 5.2 | 24.0 | 14.0 | 41.4 | 13.1 | 86.9 | 21.6 | 78.4 | 15.3 | 84.7 |
| 2 | 32 | 36.3 | 32.2 | 9.6 | 20.7 | 46.3 | 4.2 | 11.3 | 37.1 | 8.8 | 9.6 | 91.4 | 0.8 | 4.0 | 4.8 | 7.3 | 3.3 | 0.9 | 0.3 | 1.9 | 49.1 | 16.0 | 2.0 | 1.8 | 119.5 | 117.4 | 2.1 | 33.8 | 2.23 | 20.8 | 2.2 | 11.2 | 6.8 | 9.3 | 56.4 | 64.5 | 33.1 | 99.78 | 17.3 | 4385 | 58 | 27 | 31 | 45.3 | 54.7 | 33.4 | 6.0 | 39.2 | 8.6 | 27.4 | 11.9 | 27.4 | 15.7 | 84.3 | 51.6 | 48.4 | 31.5 | 68.5 |
| 3 | 24 | 35.5 | 31.4 | 10.4 | 20.3 | 51.0 | 0.9 | 2.5 | 34.5 | 9.8 | 10.9 | 90.5 | 0.9 | 4.0 | 4.8 | 5.5 | 2.8 | 1.6 | 1.0 | 2.8 | 50.4 | 3.0 | 0.0 | 2.2 | 116.1 | 113.3 | 2.7 | 24.9 | 1.93 | 16.4 | 2.3 | 10.9 | 6.5 | 8.5 | 53.1 | 62.6 | 31.8 | 103.47 | 17.5 | 5211 | 68 | 33 | 35 | 87.8 | 12.2 | 60.5 | 9.7 | 8.1 | 12.7 | 31.4 | 17.7 | 50.8 | 20.1 | 79.9 | 31.0 | 69.0 | 21.0 | 79.0 |
| 4 | 28 | 32.1 | 31.1 | 11.2 | 20.3 | 55.3 | 0.7 | 2.7 | 27.5 | 7.9 | 12.3 | 64.5 | 2.2 | 9.6 | 11.8 | 5.7 | 3.9 | 0.8 | 0.8 | 3.1 | 54.8 | 46.0 | 6.0 | 5.4 | 116.4 | 109.2 | 7.2 | 31.4 | 1.46 | 16.2 | 6.5 | 26.8 | 17.1 | 11.1 | 57.2 | 60.5 | 37.3 | 103.63 | 20.4 | 4380 | 63 | 47 | 16 | 86.6 | 13.4 | 67.4 | 7.0 | 7.2 | 17.5 | 25.4 | 12.9 | 60.3 | 41.5 | 58.5 | 61.7 | 38.3 | 42.9 | 57.1 |
filtered_df1.info() # Make sure we have 416 players
<class 'pandas.core.frame.DataFrame'> Int64Index: 416 entries, 0 to 481 Data columns (total 58 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Age 416 non-null int64 1 Min 416 non-null float64 2 PTS 416 non-null float64 3 FGM 416 non-null float64 4 FGA 416 non-null float64 5 FG% 416 non-null float64 6 3PM 416 non-null float64 7 3PA 416 non-null float64 8 3P% 416 non-null float64 9 FTM 416 non-null float64 10 FTA 416 non-null float64 11 FT% 416 non-null float64 12 OREB 416 non-null float64 13 DREB 416 non-null float64 14 REB 416 non-null float64 15 AST 416 non-null float64 16 TOV 416 non-null float64 17 STL 416 non-null float64 18 BLK 416 non-null float64 19 PF 416 non-null float64 20 FP 416 non-null float64 21 DD2 416 non-null float64 22 TD3 416 non-null float64 23 +/- 416 non-null float64 24 OFFRTG 416 non-null float64 25 DEFRTG 416 non-null float64 26 NETRTG 416 non-null float64 27 AST% 416 non-null float64 28 AST/TO 416 non-null float64 29 AST RATIO 416 non-null float64 30 OREB% 416 non-null float64 31 DREB% 416 non-null float64 32 REB% 416 non-null float64 33 TO RATIO 416 non-null float64 34 EFG% 416 non-null float64 35 TS% 416 non-null float64 36 USG% 416 non-null float64 37 PACE 416 non-null float64 38 PIE 416 non-null float64 39 POSS 416 non-null int64 40 GP 416 non-null int64 41 W 416 non-null int64 42 L 416 non-null int64 43 %FGA2PT 416 non-null float64 44 %FGA3PT 416 non-null float64 45 %PTS2PT 416 non-null float64 46 %PTS2PT MR 416 non-null float64 47 %PTS3PT 416 non-null float64 48 %PTSFBPs 416 non-null float64 49 %PTSFT 416 non-null float64 50 %PTSOFFTO 416 non-null float64 51 %PTSPITP 416 non-null float64 52 2FGM%AST 416 non-null float64 53 2FGM%UAST 416 non-null float64 54 3FGM%AST 416 non-null float64 55 3FGM%UAST 416 non-null float64 56 FGM%AST 416 non-null float64 57 FGM%UAST 416 non-null float64 dtypes: float64(53), int64(5) memory usage: 191.8 KB
We will do the same scaling, as well as scoring and visualizations to determine how many clusters we should include
scaler = StandardScaler()
filtered_X = scaler.fit_transform(filtered_df1)
wcss = []
ch_scores = []
db_scores = []
s_scores = []
for i in range(2, 25):
kmeans = KMeans(n_clusters=i, init='k-means++', random_state = 9)
kmeans.fit(filtered_X)
wcss.append(kmeans.inertia_)
ch_scores.append(calinski_harabasz_score(filtered_X, kmeans.labels_))
db_scores.append(davies_bouldin_score(filtered_X, kmeans.labels_))
s_scores.append(silhouette_score(filtered_X, kmeans.labels_))
fig, ax = plt.subplots(2,2, figsize=(12,12))
ax[0,0].plot(range(2, 25), wcss)
ax[0,0].set_title('K Means with Elbow Method (Lowest)')
ax[0,0].set_xticks(range(2,25))
ax[0,1].plot(range(2, 25), ch_scores)
ax[0,1].set_title('K Means with Calinski-Harabasz Method (Highest)')
ax[0,1].set_xticks(range(2,25))
ax[1,0].plot(range(2, 25), s_scores)
ax[1,0].set_title('K Means with Silhouette Method (Highest)')
ax[1,0].set_xticks(range(2,25))
ax[1,1].plot(range(2, 25), db_scores)
ax[1,1].set_title('K Means with Davies-Boulding Method (Lowest)')
ax[1,1].set_xticks(range(2,25))
fig.show()
C:\Users\tfurr\AppData\Local\Temp\ipykernel_4064\2825464523.py:16: UserWarning: Matplotlib is currently using module://matplotlib_inline.backend_inline, which is a non-GUI backend, so cannot show the figure.
This is really encouraging. By filtering out the players that don't play much, the elbow method has a little more of an elbow at 3 (though it's still pretty smooth), and the other 3 methods all point to 3 clusters. While the Davies-Boulding method suggests 15 or 18 as well, I will stick with 3
kmeans = KMeans(n_clusters=3)
kmeans.fit(filtered_X)
labels3 = kmeans.labels_ # Save the labels
# Use this line to make sure a warning does not appear as a copy of a slice
filtered_df.loc[:, 'labels_3'] = labels3
filtered_df['labels_3'] = filtered_df['labels_3'].astype('category') # make sure these are categorical
Let's look at some of the players in each of the clusters
filtered_df.loc[filtered_df['labels_3'] == 1, 'Player']
16 Anthony Davis
50 Bam Adebayo
64 Jaren Jackson Jr.
67 Deandre Ayton
68 Myles Turner
...
448 Christian Koloko
451 Richaun Holmes
453 Robin Lopez
459 Usman Garuba
469 Moussa Diabate
Name: Player, Length: 92, dtype: object
filtered_df.loc[filtered_df['labels_3'] == 0, 'Player']
71 Michael Porter Jr.
74 Andrew Wiggins
87 P.J. Washington
88 Cameron Johnson
89 De'Andre Hunter
...
468 Kessler Edwards
473 Ish Smith
476 Jeff Dowtin Jr.
478 Theo Pinson
481 Trent Forrest
Name: Player, Length: 216, dtype: object
filtered_df.loc[filtered_df['labels_3'] == 2, 'Player']
0 Joel Embiid
1 Luka Doncic
2 Damian Lillard
3 Shai Gilgeous-Alexander
4 Giannis Antetokounmpo
...
166 Cameron Payne
167 Monte Morris
185 Josh Hart
197 Kyle Anderson
224 T.J. McConnell
Name: Player, Length: 108, dtype: object
filtered_df.head() # Take a look
| Player | Team | Age | Min | PTS | FGM | FGA | FG% | 3PM | 3PA | 3P% | FTM | FTA | FT% | OREB | DREB | REB | AST | TOV | STL | BLK | PF | FP | DD2 | TD3 | +/- | OFFRTG | DEFRTG | NETRTG | AST% | AST/TO | AST RATIO | OREB% | DREB% | REB% | TO RATIO | EFG% | TS% | USG% | PACE | PIE | POSS | GP | W | L | %FGA2PT | %FGA3PT | %PTS2PT | %PTS2PT MR | %PTS3PT | %PTSFBPs | %PTSFT | %PTSOFFTO | %PTSPITP | 2FGM%AST | 2FGM%UAST | 3FGM%AST | 3FGM%UAST | FGM%AST | FGM%UAST | labels_6 | labels_17 | labels_3 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Joel Embiid | PHI | 29 | 34.6 | 33.1 | 11.0 | 20.1 | 54.8 | 1.0 | 3.0 | 33.0 | 10.0 | 11.7 | 85.7 | 1.7 | 8.4 | 10.2 | 4.2 | 3.4 | 1.0 | 1.7 | 3.1 | 56.2 | 39.0 | 1.0 | 6.4 | 119.0 | 110.2 | 8.8 | 23.3 | 1.21 | 12.7 | 5.7 | 24.3 | 15.7 | 10.5 | 57.3 | 65.5 | 37.0 | 97.34 | 21.3 | 4639 | 66 | 43 | 23 | 84.9 | 15.1 | 60.7 | 15.7 | 9.1 | 6.3 | 30.2 | 12.7 | 45.0 | 60.0 | 40.0 | 89.4 | 10.6 | 62.6 | 37.4 | 4 | 13 | 2 |
| 1 | Luka Doncic | DAL | 24 | 36.2 | 32.4 | 10.9 | 22.0 | 49.6 | 2.8 | 8.2 | 34.2 | 7.8 | 10.5 | 74.2 | 0.8 | 7.8 | 8.6 | 8.0 | 3.6 | 1.4 | 0.5 | 2.5 | 56.8 | 36.0 | 10.0 | 1.9 | 118.1 | 116.0 | 2.1 | 40.8 | 2.24 | 21.1 | 2.4 | 22.4 | 12.4 | 9.4 | 56.0 | 60.9 | 36.8 | 97.63 | 20.2 | 4874 | 66 | 33 | 33 | 62.7 | 37.3 | 50.0 | 8.5 | 26.0 | 5.2 | 24.0 | 14.0 | 41.4 | 13.1 | 86.9 | 21.6 | 78.4 | 15.3 | 84.7 | 4 | 1 | 2 |
| 2 | Damian Lillard | POR | 32 | 36.3 | 32.2 | 9.6 | 20.7 | 46.3 | 4.2 | 11.3 | 37.1 | 8.8 | 9.6 | 91.4 | 0.8 | 4.0 | 4.8 | 7.3 | 3.3 | 0.9 | 0.3 | 1.9 | 49.1 | 16.0 | 2.0 | 1.8 | 119.5 | 117.4 | 2.1 | 33.8 | 2.23 | 20.8 | 2.2 | 11.2 | 6.8 | 9.3 | 56.4 | 64.5 | 33.1 | 99.78 | 17.3 | 4385 | 58 | 27 | 31 | 45.3 | 54.7 | 33.4 | 6.0 | 39.2 | 8.6 | 27.4 | 11.9 | 27.4 | 15.7 | 84.3 | 51.6 | 48.4 | 31.5 | 68.5 | 4 | 1 | 2 |
| 3 | Shai Gilgeous-Alexander | OKC | 24 | 35.5 | 31.4 | 10.4 | 20.3 | 51.0 | 0.9 | 2.5 | 34.5 | 9.8 | 10.9 | 90.5 | 0.9 | 4.0 | 4.8 | 5.5 | 2.8 | 1.6 | 1.0 | 2.8 | 50.4 | 3.0 | 0.0 | 2.2 | 116.1 | 113.3 | 2.7 | 24.9 | 1.93 | 16.4 | 2.3 | 10.9 | 6.5 | 8.5 | 53.1 | 62.6 | 31.8 | 103.47 | 17.5 | 5211 | 68 | 33 | 35 | 87.8 | 12.2 | 60.5 | 9.7 | 8.1 | 12.7 | 31.4 | 17.7 | 50.8 | 20.1 | 79.9 | 31.0 | 69.0 | 21.0 | 79.0 | 4 | 1 | 2 |
| 4 | Giannis Antetokounmpo | MIL | 28 | 32.1 | 31.1 | 11.2 | 20.3 | 55.3 | 0.7 | 2.7 | 27.5 | 7.9 | 12.3 | 64.5 | 2.2 | 9.6 | 11.8 | 5.7 | 3.9 | 0.8 | 0.8 | 3.1 | 54.8 | 46.0 | 6.0 | 5.4 | 116.4 | 109.2 | 7.2 | 31.4 | 1.46 | 16.2 | 6.5 | 26.8 | 17.1 | 11.1 | 57.2 | 60.5 | 37.3 | 103.63 | 20.4 | 4380 | 63 | 47 | 16 | 86.6 | 13.4 | 67.4 | 7.0 | 7.2 | 17.5 | 25.4 | 12.9 | 60.3 | 41.5 | 58.5 | 61.7 | 38.3 | 42.9 | 57.1 | 4 | 13 | 2 |
When I don't have dimension reduction, it's a little more difficult to get a good visualization. There are so many columns to look at that with 2 or even 3 dimensions we are not likely to see definitive splits. That being said, it's interesting to see. We can change the variables as well to look at different combinations of variables.
fig = px.scatter(filtered_df, x='NETRTG', y='PIE', color='labels_3', hover_data=['Player'],
title="2023 NBA Regular Filtered Season Stats Clustering on 2 Variables")
fig.show()
fig = px.scatter_3d(filtered_df, x='NETRTG', y='PIE', z='TS%', color='labels_3', hover_data=['Player'],
title="2023 NBA Regular Filtered Season Stats Clustering on 3 Variables")
fig.show()
In this case, I honestly don't have any definitive labels for these clusters. The only thing I can see is that many of the players in cluster 2 are big men that generally have a lot of rebounds. Other than that, I'm not entirely sure.
Like before, I will look at PCA for this filtered data
filtered_pca1 = PCA(n_components = 25)
filtered_X_pca1 = filtered_pca1.fit_transform(filtered_X)
plt.bar(range(1, 21), filtered_pca1.explained_variance_ratio_[0:20])
plt.xlabel('Principal Component')
plt.ylabel('Explained Variance Ratio')
plt.xticks(np.arange(1, 21))
plt.show()
filtered_pca1.explained_variance_ratio_
array([0.28123134, 0.20948999, 0.10193812, 0.05984639, 0.04173367,
0.03754766, 0.03023536, 0.02516983, 0.02184808, 0.01966989,
0.01890735, 0.01838053, 0.01550588, 0.01466067, 0.01327966,
0.00997892, 0.00980196, 0.00864983, 0.00741003, 0.00727333,
0.00648518, 0.00592371, 0.00530741, 0.00510965, 0.00427241])
The first 4 components explain about 65% of the variance in the filtered data. So I am going to keep those components. An argument could have been made to just keep the first 2 considering they explain 48% of the variance. I felt 4 components was an appropriate amount
This is the same as before, I run PCA, get the scores and plot them to find the correct number of clusters
filtered_pca = PCA(n_components = 4)
filtered_X_pca = filtered_pca.fit_transform(filtered_X)
wcss = []
ch_scores = []
db_scores = []
s_scores = []
for i in range(2, 25):
kmeans = KMeans(n_clusters=i, init='k-means++', random_state = 9)
kmeans.fit(filtered_X_pca)
wcss.append(kmeans.inertia_)
ch_scores.append(calinski_harabasz_score(filtered_X_pca, kmeans.labels_))
db_scores.append(davies_bouldin_score(filtered_X_pca, kmeans.labels_))
s_scores.append(silhouette_score(filtered_X_pca, kmeans.labels_))
fig, ax = plt.subplots(2,2, figsize=(12,12))
ax[0,0].plot(range(2, 25), wcss)
ax[0,0].set_title('K Means with Elbow Method (Lowest)')
ax[0,0].set_xticks(range(2,25))
ax[0,1].plot(range(2, 25), ch_scores)
ax[0,1].set_title('K Means with Calinski-Harabasz Method (Highest)')
ax[0,1].set_xticks(range(2,25))
ax[1,0].plot(range(2, 25), s_scores)
ax[1,0].set_title('K Means with Silhouette Method (Highest)')
ax[1,0].set_xticks(range(2,25))
ax[1,1].plot(range(2, 25), db_scores)
ax[1,1].set_title('K Means with Davies-Boulding Method (Lowest)')
ax[1,1].set_xticks(range(2,25))
fig.show()
C:\Users\tfurr\AppData\Local\Temp\ipykernel_4064\2825464523.py:16: UserWarning: Matplotlib is currently using module://matplotlib_inline.backend_inline, which is a non-GUI backend, so cannot show the figure.
We can see from our graphs above that the silhouette, Davies-Boulding and Calinski-Harabasz scores all give 3 clusters as the best number of clusters, just like with the original filtered data. We will perform k-means with 3 clusters and see the results.
kmeans = KMeans(n_clusters=3)
kmeans.fit(filtered_X_pca)
labels3_pca = kmeans.labels_
# Use this line to make sure a warning does not appear as a copy of a slice
filtered_df.loc[:, 'labels_3_pca'] = labels3_pca
filtered_df.head()
| Player | Team | Age | Min | PTS | FGM | FGA | FG% | 3PM | 3PA | 3P% | FTM | FTA | FT% | OREB | DREB | REB | AST | TOV | STL | BLK | PF | FP | DD2 | TD3 | +/- | OFFRTG | DEFRTG | NETRTG | AST% | AST/TO | AST RATIO | OREB% | DREB% | REB% | TO RATIO | EFG% | TS% | USG% | PACE | PIE | POSS | GP | W | L | %FGA2PT | %FGA3PT | %PTS2PT | %PTS2PT MR | %PTS3PT | %PTSFBPs | %PTSFT | %PTSOFFTO | %PTSPITP | 2FGM%AST | 2FGM%UAST | 3FGM%AST | 3FGM%UAST | FGM%AST | FGM%UAST | labels_6 | labels_17 | labels_3 | labels_3_pca | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Joel Embiid | PHI | 29 | 34.6 | 33.1 | 11.0 | 20.1 | 54.8 | 1.0 | 3.0 | 33.0 | 10.0 | 11.7 | 85.7 | 1.7 | 8.4 | 10.2 | 4.2 | 3.4 | 1.0 | 1.7 | 3.1 | 56.2 | 39.0 | 1.0 | 6.4 | 119.0 | 110.2 | 8.8 | 23.3 | 1.21 | 12.7 | 5.7 | 24.3 | 15.7 | 10.5 | 57.3 | 65.5 | 37.0 | 97.34 | 21.3 | 4639 | 66 | 43 | 23 | 84.9 | 15.1 | 60.7 | 15.7 | 9.1 | 6.3 | 30.2 | 12.7 | 45.0 | 60.0 | 40.0 | 89.4 | 10.6 | 62.6 | 37.4 | 4 | 13 | 2 | 0 |
| 1 | Luka Doncic | DAL | 24 | 36.2 | 32.4 | 10.9 | 22.0 | 49.6 | 2.8 | 8.2 | 34.2 | 7.8 | 10.5 | 74.2 | 0.8 | 7.8 | 8.6 | 8.0 | 3.6 | 1.4 | 0.5 | 2.5 | 56.8 | 36.0 | 10.0 | 1.9 | 118.1 | 116.0 | 2.1 | 40.8 | 2.24 | 21.1 | 2.4 | 22.4 | 12.4 | 9.4 | 56.0 | 60.9 | 36.8 | 97.63 | 20.2 | 4874 | 66 | 33 | 33 | 62.7 | 37.3 | 50.0 | 8.5 | 26.0 | 5.2 | 24.0 | 14.0 | 41.4 | 13.1 | 86.9 | 21.6 | 78.4 | 15.3 | 84.7 | 4 | 1 | 2 | 0 |
| 2 | Damian Lillard | POR | 32 | 36.3 | 32.2 | 9.6 | 20.7 | 46.3 | 4.2 | 11.3 | 37.1 | 8.8 | 9.6 | 91.4 | 0.8 | 4.0 | 4.8 | 7.3 | 3.3 | 0.9 | 0.3 | 1.9 | 49.1 | 16.0 | 2.0 | 1.8 | 119.5 | 117.4 | 2.1 | 33.8 | 2.23 | 20.8 | 2.2 | 11.2 | 6.8 | 9.3 | 56.4 | 64.5 | 33.1 | 99.78 | 17.3 | 4385 | 58 | 27 | 31 | 45.3 | 54.7 | 33.4 | 6.0 | 39.2 | 8.6 | 27.4 | 11.9 | 27.4 | 15.7 | 84.3 | 51.6 | 48.4 | 31.5 | 68.5 | 4 | 1 | 2 | 0 |
| 3 | Shai Gilgeous-Alexander | OKC | 24 | 35.5 | 31.4 | 10.4 | 20.3 | 51.0 | 0.9 | 2.5 | 34.5 | 9.8 | 10.9 | 90.5 | 0.9 | 4.0 | 4.8 | 5.5 | 2.8 | 1.6 | 1.0 | 2.8 | 50.4 | 3.0 | 0.0 | 2.2 | 116.1 | 113.3 | 2.7 | 24.9 | 1.93 | 16.4 | 2.3 | 10.9 | 6.5 | 8.5 | 53.1 | 62.6 | 31.8 | 103.47 | 17.5 | 5211 | 68 | 33 | 35 | 87.8 | 12.2 | 60.5 | 9.7 | 8.1 | 12.7 | 31.4 | 17.7 | 50.8 | 20.1 | 79.9 | 31.0 | 69.0 | 21.0 | 79.0 | 4 | 1 | 2 | 0 |
| 4 | Giannis Antetokounmpo | MIL | 28 | 32.1 | 31.1 | 11.2 | 20.3 | 55.3 | 0.7 | 2.7 | 27.5 | 7.9 | 12.3 | 64.5 | 2.2 | 9.6 | 11.8 | 5.7 | 3.9 | 0.8 | 0.8 | 3.1 | 54.8 | 46.0 | 6.0 | 5.4 | 116.4 | 109.2 | 7.2 | 31.4 | 1.46 | 16.2 | 6.5 | 26.8 | 17.1 | 11.1 | 57.2 | 60.5 | 37.3 | 103.63 | 20.4 | 4380 | 63 | 47 | 16 | 86.6 | 13.4 | 67.4 | 7.0 | 7.2 | 17.5 | 25.4 | 12.9 | 60.3 | 41.5 | 58.5 | 61.7 | 38.3 | 42.9 | 57.1 | 4 | 13 | 2 | 0 |
filtered_df.labels_3_pca.value_counts() # Look at the distribution
1 217 0 109 2 90 Name: labels_3_pca, dtype: int64
filtered_df_new_pca = pd.DataFrame({'Player': filtered_df.Player, 'PC1': filtered_X_pca[:, 0], 'PC2': filtered_X_pca[:, 1], 'PC3': filtered_X_pca[:, 2], 'Cluster': filtered_df.labels_3_pca})
filtered_df_new_pca['Cluster'] = filtered_df_new_pca['Cluster'].astype('category')
fig = px.scatter(filtered_df_new_pca, x='PC1', y='PC2', color='Cluster', hover_data=['Player'],
title="2023 NBA Regular Filtered PCA Season Stats Clustering on 2 Components")
fig.show()
fig = px.scatter_3d(filtered_df_new_pca, x='PC1', y='PC2',z='PC3', color='Cluster', hover_data=['Player'],
title="2023 NBA Regular Filtered PCA Season Stats Clustering on 3 Components")
fig.show()
In terms of the players in each cluster, PCA and the regular filtered data aren't that different. That tells me that the players that were only playing a few minutes were causing some problems. In terms of the visualizations, PCA obviously makes it easier to see the clusters since we are looking at components and not actual variables. The interesting this is that I can see a more definitive break in the clusters. I can essentially label them. Label 2 contains your more traditional high-rebounding and blocking, lower three point shooting big men. This includes Mitchell Robinson, Rudy Gobert, Deandre Ayton, etc. Cluster 0 is your more traditional high volume players. Essentially these are the best players on teams and can often be considered superstars. This includes Luka Doncic, Devin Booker, Giannis Antetokounmpo, Nikola Jokic and many others. Cluster 1 is essentially everyone else. This includes 3 and d players (Grand Williams, Cam Johnson, Michael Porter Jr.) and other role players along with those that don't play as much. I like this because we can see the break in where players end up a little more.